You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2015/07/28 21:49:08 UTC

[5/5] hive git commit: HIVE-10799. Refactor the SearchArgumentFactory to remove the AST-specific factory. (omalley reviewed by prasanth_j)

HIVE-10799. Refactor the SearchArgumentFactory to remove the AST-specific factory. (omalley reviewed by prasanth_j)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c178a6e9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c178a6e9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c178a6e9

Branch: refs/heads/master
Commit: c178a6e9d12055e5bde634123ca58f243ae39477
Parents: d2ee354
Author: Owen O'Malley <om...@apache.org>
Authored: Tue Jul 28 12:47:59 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Jul 28 12:47:59 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/common/type/HiveDecimal.java    |    2 +-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |    4 +-
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java |   30 +-
 .../read/ParquetRecordReaderWrapper.java        |    7 +-
 .../hive/ql/io/sarg/ConvertAstToSearchArg.java  |  439 +++
 .../hive/ql/io/sarg/SearchArgumentFactory.java  |   25 +-
 .../hive/ql/io/sarg/SearchArgumentImpl.java     | 1000 ++----
 .../hive/ql/io/orc/TestInputOutputFormat.java   |    9 +-
 .../hadoop/hive/ql/io/orc/TestOrcFile.java      |   11 +-
 .../hive/ql/io/orc/TestRecordReaderImpl.java    |   63 +-
 .../parquet/TestParquetRecordReaderWrapper.java |  155 +
 .../ql/io/sarg/TestConvertAstToSearchArg.java   | 2863 +++++++++++++++++
 .../hive/ql/io/sarg/TestSearchArgumentImpl.java | 2888 +-----------------
 .../hadoop/hive/ql/io/sarg/PredicateLeaf.java   |   33 +-
 .../hadoop/hive/ql/io/sarg/SearchArgument.java  |   34 +-
 .../hive/serde2/io/HiveDecimalWritable.java     |    8 +
 16 files changed, 4080 insertions(+), 3491 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java b/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
index a8215f2..f14fc2d 100644
--- a/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
+++ b/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
@@ -75,7 +75,7 @@ public class HiveDecimal implements Comparable<HiveDecimal> {
   public static HiveDecimal create(String dec) {
     BigDecimal bd;
     try {
-      bd = new BigDecimal(dec);
+      bd = new BigDecimal(dec.trim());
     } catch (NumberFormatException ex) {
       return null;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 3a9e64e..4e6dd7a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -54,10 +54,10 @@ import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
 import org.apache.hadoop.hive.ql.io.InputFormatChecker;
 import org.apache.hadoop.hive.ql.io.RecordIdentifier;
 import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
+import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.serde2.SerDeStats;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -305,7 +305,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
       options.searchArgument(null, null);
       return;
     }
-    SearchArgument sarg = SearchArgumentFactory.createFromConf(conf);
+    SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
     if (sarg == null) {
       LOG.debug("No ORC pushdown predicate");
       options.searchArgument(null, null);

http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index 4f79e37..f85420d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -47,8 +47,8 @@ import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory.TreeReader;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim;
 import org.apache.hadoop.io.Text;
@@ -523,7 +523,8 @@ class RecordReaderImpl implements RecordReader {
         result = TruthValue.YES_NO_NULL;
       }
     } else if (predObj instanceof String || predObj instanceof Text ||
-        predObj instanceof HiveDecimal || predObj instanceof BigDecimal) {
+        predObj instanceof HiveDecimalWritable ||
+        predObj instanceof BigDecimal) {
       if (bf.testString(predObj.toString())) {
         result = TruthValue.YES_NO_NULL;
       }
@@ -560,11 +561,7 @@ class RecordReaderImpl implements RecordReader {
   }
 
   private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object obj) {
-    if (obj != null) {
-      if (obj instanceof ExprNodeConstantDesc) {
-        obj = ((ExprNodeConstantDesc) obj).getValue();
-      }
-    } else {
+    if (obj == null) {
       return null;
     }
     switch (type) {
@@ -588,20 +585,23 @@ class RecordReaderImpl implements RecordReader {
         break;
       case DECIMAL:
         if (obj instanceof Boolean) {
-          return ((Boolean) obj).booleanValue() ? HiveDecimal.ONE : HiveDecimal.ZERO;
+          return new HiveDecimalWritable(((Boolean) obj).booleanValue() ?
+              HiveDecimal.ONE : HiveDecimal.ZERO);
         } else if (obj instanceof Integer) {
-          return HiveDecimal.create(((Integer) obj).intValue());
+          return new HiveDecimalWritable(((Integer) obj).intValue());
         } else if (obj instanceof Long) {
-          return HiveDecimal.create(((Long) obj));
+          return new HiveDecimalWritable(((Long) obj));
         } else if (obj instanceof Float || obj instanceof Double ||
             obj instanceof String) {
-          return HiveDecimal.create(obj.toString());
+          return new HiveDecimalWritable(obj.toString());
         } else if (obj instanceof BigDecimal) {
-          return HiveDecimal.create((BigDecimal) obj);
+          return new HiveDecimalWritable(HiveDecimal.create((BigDecimal) obj));
         } else if (obj instanceof HiveDecimal) {
+          return new HiveDecimalWritable((HiveDecimal) obj);
+        } else if (obj instanceof HiveDecimalWritable) {
           return obj;
         } else if (obj instanceof Timestamp) {
-          return HiveDecimal.create(
+          return new HiveDecimalWritable(
               new Double(new TimestampWritable((Timestamp) obj).getDouble()).toString());
         }
         break;
@@ -641,12 +641,16 @@ class RecordReaderImpl implements RecordReader {
       case TIMESTAMP:
         if (obj instanceof Timestamp) {
           return obj;
+        } else if (obj instanceof Integer) {
+          return TimestampWritable.longToTimestamp(((Number) obj).longValue(), false);
         } else if (obj instanceof Float) {
           return TimestampWritable.doubleToTimestamp(((Float) obj).doubleValue());
         } else if (obj instanceof Double) {
           return TimestampWritable.doubleToTimestamp(((Double) obj).doubleValue());
         } else if (obj instanceof HiveDecimal) {
           return TimestampWritable.decimalToTimestamp((HiveDecimal) obj);
+        } else if (obj instanceof HiveDecimalWritable) {
+          return TimestampWritable.decimalToTimestamp(((HiveDecimalWritable) obj).getHiveDecimal());
         } else if (obj instanceof Date) {
           return new Timestamp(((Date) obj).getTime());
         }

http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
index a64ec06..49e52da 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
@@ -27,10 +27,10 @@ import org.apache.hadoop.hive.ql.io.IOConstants;
 import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder;
 import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory;
 import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher;
+import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.io.ArrayWritable;
@@ -149,7 +149,7 @@ public class ParquetRecordReaderWrapper  implements RecordReader<NullWritable, A
     }
 
     SearchArgument sarg =
-        SearchArgumentFactory.create(Utilities.deserializeExpression
+        ConvertAstToSearchArg.create(Utilities.deserializeExpression
             (serializedPushdown));
     FilterPredicate p = toFilterPredicate(sarg);
     if (p != null) {
@@ -316,8 +316,7 @@ public class ParquetRecordReaderWrapper  implements RecordReader<NullWritable, A
    * @return translate the sarg into a filter predicate
    */
   public static FilterPredicate toFilterPredicate(SearchArgument sarg) {
-    return translate(sarg.getExpression(),
-        sarg.getLeaves());
+    return translate(sarg.getExpression(), sarg.getLeaves());
   }
 
   private static boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) {

http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
new file mode 100644
index 0000000..eb8c03f
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
@@ -0,0 +1,439 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.io.Input;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.List;
+
+public class ConvertAstToSearchArg {
+  private static final Log LOG = LogFactory.getLog(ConvertAstToSearchArg.class);
+  private final SearchArgument.Builder builder =
+      SearchArgumentFactory.newBuilder();
+
+  /**
+   * Builds the expression and leaf list from the original predicate.
+   * @param expression the expression to translate.
+   */
+  ConvertAstToSearchArg(ExprNodeGenericFuncDesc expression) {
+    parse(expression);
+  }
+
+  /**
+   * Build the search argument from the expression.
+   * @return the search argument
+   */
+  public SearchArgument buildSearchArgument() {
+    return builder.build();
+  }
+
+  /**
+   * Get the type of the given expression node.
+   * @param expr the expression to get the type of
+   * @return int, string, or float or null if we don't know the type
+   */
+  private static PredicateLeaf.Type getType(ExprNodeDesc expr) {
+    TypeInfo type = expr.getTypeInfo();
+    if (type.getCategory() == ObjectInspector.Category.PRIMITIVE) {
+      switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) {
+        case BYTE:
+        case SHORT:
+        case INT:
+          return PredicateLeaf.Type.INTEGER;
+        case LONG:
+          return PredicateLeaf.Type.LONG;
+        case CHAR:
+        case VARCHAR:
+        case STRING:
+          return PredicateLeaf.Type.STRING;
+        case FLOAT:
+        case DOUBLE:
+          return PredicateLeaf.Type.FLOAT;
+        case DATE:
+          return PredicateLeaf.Type.DATE;
+        case TIMESTAMP:
+          return PredicateLeaf.Type.TIMESTAMP;
+        case DECIMAL:
+          return PredicateLeaf.Type.DECIMAL;
+        case BOOLEAN:
+          return PredicateLeaf.Type.BOOLEAN;
+        default:
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Get the column name referenced in the expression. It must be at the top
+   * level of this expression and there must be exactly one column.
+   * @param expr the expression to look in
+   * @param variable the slot the variable is expected in
+   * @return the column name or null if there isn't exactly one column
+   */
+  private static String getColumnName(ExprNodeGenericFuncDesc expr,
+                                      int variable) {
+    List<ExprNodeDesc> children = expr.getChildren();
+    if (variable < 0 || variable >= children.size()) {
+      return null;
+    }
+    ExprNodeDesc child = children.get(variable);
+    if (child instanceof ExprNodeColumnDesc) {
+      return ((ExprNodeColumnDesc) child).getColumn();
+    }
+    return null;
+  }
+
+  private static Object boxLiteral(ExprNodeConstantDesc constantDesc,
+                                   PredicateLeaf.Type type) {
+    Object lit = constantDesc.getValue();
+    if (lit == null) {
+      return null;
+    }
+    switch (type) {
+      case INTEGER:
+        return ((Number) lit).intValue();
+      case LONG:
+        return ((Number) lit).longValue();
+      case STRING:
+        if (lit instanceof HiveChar) {
+          lit = ((HiveChar) lit).getPaddedValue();
+        } else if (lit instanceof String) {
+          return lit;
+        } else {
+          return lit.toString();
+        }
+      case FLOAT:
+        if (lit instanceof Float) {
+          // converting a float directly to a double causes annoying conversion
+          // problems
+          return Double.parseDouble(lit.toString());
+        } else {
+          return ((Number) lit).doubleValue();
+        }
+      case TIMESTAMP:
+        return Timestamp.valueOf(lit.toString());
+      case DATE:
+        return Date.valueOf(lit.toString());
+      case DECIMAL:
+        LOG.warn("boxing " + lit);
+        return new HiveDecimalWritable(lit.toString());
+      case BOOLEAN:
+        return lit;
+      default:
+        throw new IllegalArgumentException("Unknown literal " + type);
+    }
+  }
+
+  /**
+   * Find the child that is the literal.
+   * @param expr the parent node to check
+   * @param type the type of the expression
+   * @return the literal boxed if found or null
+   */
+  private static Object findLiteral(ExprNodeGenericFuncDesc expr,
+                                    PredicateLeaf.Type type) {
+    List<ExprNodeDesc> children = expr.getChildren();
+    if (children.size() != 2) {
+      return null;
+    }
+    Object result = null;
+    for(ExprNodeDesc child: children) {
+      if (child instanceof ExprNodeConstantDesc) {
+        if (result != null) {
+          return null;
+        }
+        result = boxLiteral((ExprNodeConstantDesc) child, type);
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Return the boxed literal at the given position
+   * @param expr the parent node
+   * @param type the type of the expression
+   * @param position the child position to check
+   * @return the boxed literal if found otherwise null
+   */
+  private static Object getLiteral(ExprNodeGenericFuncDesc expr,
+                                   PredicateLeaf.Type type,
+                                   int position) {
+    List<ExprNodeDesc> children = expr.getChildren();
+    Object child = children.get(position);
+    if (child instanceof ExprNodeConstantDesc) {
+      return boxLiteral((ExprNodeConstantDesc) child, type);
+    }
+    return null;
+  }
+
+  private static Object[] getLiteralList(ExprNodeGenericFuncDesc expr,
+                                         PredicateLeaf.Type type,
+                                         int start) {
+    List<ExprNodeDesc> children = expr.getChildren();
+    Object[] result = new Object[children.size() - start];
+
+    // ignore the first child, since it is the variable
+    int posn = 0;
+    for(ExprNodeDesc child: children.subList(start, children.size())) {
+      if (child instanceof ExprNodeConstantDesc) {
+        result[posn++] = boxLiteral((ExprNodeConstantDesc) child, type);
+      } else {
+        // if we get some non-literals, we need to punt
+        return null;
+      }
+    }
+    return result;
+  }
+
+  private void createLeaf(PredicateLeaf.Operator operator,
+                          ExprNodeGenericFuncDesc expression,
+                          int variable) {
+    String columnName = getColumnName(expression, variable);
+    if (columnName == null) {
+      builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
+      return;
+    }
+    PredicateLeaf.Type type = getType(expression.getChildren().get(variable));
+    if (type == null) {
+      builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
+      return;
+    }
+
+    // if the variable was on the right, we need to swap things around
+    boolean needSwap = false;
+    if (variable != 0) {
+      if (operator == PredicateLeaf.Operator.LESS_THAN) {
+        needSwap = true;
+        operator = PredicateLeaf.Operator.LESS_THAN_EQUALS;
+      } else if (operator == PredicateLeaf.Operator.LESS_THAN_EQUALS) {
+        needSwap = true;
+        operator = PredicateLeaf.Operator.LESS_THAN;
+      }
+    }
+    if (needSwap) {
+      builder.startNot();
+    }
+
+    switch (operator) {
+      case IS_NULL:
+        builder.isNull(columnName, type);
+        break;
+      case EQUALS:
+        builder.equals(columnName, type, findLiteral(expression, type));
+        break;
+      case NULL_SAFE_EQUALS:
+        builder.nullSafeEquals(columnName, type, findLiteral(expression, type));
+        break;
+      case LESS_THAN:
+        builder.lessThan(columnName, type, findLiteral(expression, type));
+        break;
+      case LESS_THAN_EQUALS:
+        builder.lessThanEquals(columnName, type, findLiteral(expression, type));
+        break;
+      case IN:
+        builder.in(columnName, type,
+            getLiteralList(expression, type, variable + 1));
+        break;
+      case BETWEEN:
+        builder.between(columnName, type,
+            getLiteral(expression, type, variable + 1),
+            getLiteral(expression, type, variable + 2));
+        break;
+    }
+
+    if (needSwap) {
+      builder.end();
+    }
+  }
+
+  /**
+   * Find the variable in the expression.
+   * @param expr the expression to look in
+   * @return the index of the variable or -1 if there is not exactly one
+   *   variable.
+   */
+  private int findVariable(ExprNodeDesc expr) {
+    int result = -1;
+    List<ExprNodeDesc> children = expr.getChildren();
+    for(int i = 0; i < children.size(); ++i) {
+      ExprNodeDesc child = children.get(i);
+      if (child instanceof ExprNodeColumnDesc) {
+        // if we already found a variable, this isn't a sarg
+        if (result != -1) {
+          return -1;
+        } else {
+          result = i;
+        }
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Create a leaf expression when we aren't sure where the variable is
+   * located.
+   * @param operator the operator type that was found
+   * @param expression the expression to check
+   */
+  private void createLeaf(PredicateLeaf.Operator operator,
+                          ExprNodeGenericFuncDesc expression) {
+    createLeaf(operator, expression, findVariable(expression));
+  }
+
+  private void addChildren(ExprNodeGenericFuncDesc node) {
+    for(ExprNodeDesc child: node.getChildren()) {
+      parse(child);
+    }
+  }
+
+  /**
+   * Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree.
+   * @param expression the Hive ExprNodeDesc
+   */
+  private void parse(ExprNodeDesc expression) {
+    // Most of the stuff we can handle are generic function descriptions, so
+    // handle the special cases.
+    if (expression.getClass() != ExprNodeGenericFuncDesc.class) {
+
+      // if it is a reference to a boolean column, covert it to a truth test.
+      if (expression instanceof ExprNodeColumnDesc) {
+        ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) expression;
+        if (columnDesc.getTypeString().equals("boolean")) {
+          builder.equals(columnDesc.getColumn(), PredicateLeaf.Type.BOOLEAN,
+              true);
+          return;
+        }
+      }
+
+      // otherwise, we don't know what to do so make it a maybe
+      builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
+      return;
+    }
+
+    // get the kind of expression
+    ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression;
+    Class<?> op = expr.getGenericUDF().getClass();
+
+    // handle the logical operators
+    if (op == GenericUDFOPOr.class) {
+      builder.startOr();
+      addChildren(expr);
+      builder.end();
+    } else if (op == GenericUDFOPAnd.class) {
+      builder.startAnd();
+      addChildren(expr);
+      builder.end();
+    } else if (op == GenericUDFOPNot.class) {
+      builder.startNot();
+      addChildren(expr);
+      builder.end();
+    } else if (op == GenericUDFOPEqual.class) {
+      createLeaf(PredicateLeaf.Operator.EQUALS, expr);
+    } else if (op == GenericUDFOPNotEqual.class) {
+      builder.startNot();
+      createLeaf(PredicateLeaf.Operator.EQUALS, expr);
+      builder.end();
+    } else if (op == GenericUDFOPEqualNS.class) {
+      createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr);
+    } else if (op == GenericUDFOPGreaterThan.class) {
+      builder.startNot();
+      createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
+      builder.end();
+    } else if (op == GenericUDFOPEqualOrGreaterThan.class) {
+      builder.startNot();
+      createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
+      builder.end();
+    } else if (op == GenericUDFOPLessThan.class) {
+      createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
+    } else if (op == GenericUDFOPEqualOrLessThan.class) {
+      createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
+    } else if (op == GenericUDFIn.class) {
+      createLeaf(PredicateLeaf.Operator.IN, expr, 0);
+    } else if (op == GenericUDFBetween.class) {
+      createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1);
+    } else if (op == GenericUDFOPNull.class) {
+      createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
+    } else if (op == GenericUDFOPNotNull.class) {
+      builder.startNot();
+      createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
+      builder.end();
+
+      // otherwise, we didn't understand it, so mark it maybe
+    } else {
+      builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
+    }
+  }
+
+
+  public static final String SARG_PUSHDOWN = "sarg.pushdown";
+
+  public static SearchArgument create(ExprNodeGenericFuncDesc expression) {
+    return new ConvertAstToSearchArg(expression).buildSearchArgument();
+  }
+
+
+  public static SearchArgument create(String kryo) {
+    Input input = new Input(Base64.decodeBase64(kryo));
+    return new Kryo().readObject(input, SearchArgumentImpl.class);
+  }
+
+  public static SearchArgument createFromConf(Configuration conf) {
+    String sargString;
+    if ((sargString = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR)) != null) {
+      return create(Utilities.deserializeExpression(sargString));
+    } else if ((sargString = conf.get(SARG_PUSHDOWN)) != null) {
+      return create(sargString);
+    }
+    return null;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
index c75e820..6ad927d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
@@ -18,6 +18,9 @@
 
 package org.apache.hadoop.hive.ql.io.sarg;
 
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.io.Input;
+import org.apache.commons.codec.binary.Base64;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
@@ -30,27 +33,7 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc;
  * A factory for creating SearchArguments.
  */
 public class SearchArgumentFactory {
-  public static final String SARG_PUSHDOWN = "sarg.pushdown";
-
-  public static SearchArgument create(ExprNodeGenericFuncDesc expression) {
-    return new SearchArgumentImpl(expression);
-  }
-
   public static Builder newBuilder() {
-    return SearchArgumentImpl.newBuilder();
-  }
-
-  public static SearchArgument create(String kryo) {
-    return SearchArgumentImpl.fromKryo(kryo);
-  }
-
-  public static SearchArgument createFromConf(Configuration conf) {
-    String sargString = null;
-    if ((sargString = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR)) != null) {
-      return create(Utilities.deserializeExpression(sargString));
-    } else if ((sargString = conf.get(SARG_PUSHDOWN)) != null) {
-      return create(sargString);
-    }
-    return null;
+    return new SearchArgumentImpl.BuilderImpl();
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
index 46f1e4e..1582a75 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
@@ -18,47 +18,20 @@
 
 package org.apache.hadoop.hive.ql.io.sarg;
 
-import java.math.BigDecimal;
 import java.sql.Timestamp;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Deque;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
 import com.esotericsoftware.kryo.Kryo;
-import com.esotericsoftware.kryo.io.Input;
 import com.esotericsoftware.kryo.io.Output;
 
 /**
@@ -74,6 +47,8 @@ final class SearchArgumentImpl implements SearchArgument {
     private final Object literal;
     private final List<Object> literalList;
 
+    // Used by kryo
+    @SuppressWarnings("unused")
     PredicateLeafImpl() {
       operator = null;
       type = null;
@@ -91,7 +66,24 @@ final class SearchArgumentImpl implements SearchArgument {
       this.type = type;
       this.columnName = columnName;
       this.literal = literal;
+      if (literal != null) {
+        if (literal.getClass() != type.getValueClass()) {
+          throw new IllegalArgumentException("Wrong value class " +
+              literal.getClass().getName() + " for " + type + "." + operator +
+              " leaf");
+        }
+      }
       this.literalList = literalList;
+      if (literalList != null) {
+        Class valueCls = type.getValueClass();
+        for(Object lit: literalList) {
+          if (lit != null && lit.getClass() != valueCls) {
+            throw new IllegalArgumentException("Wrong value class item " +
+                lit.getClass().getName() + " for " + type + "." + operator +
+                " leaf");
+          }
+        }
+      }
     }
 
     @Override
@@ -138,7 +130,7 @@ final class SearchArgumentImpl implements SearchArgument {
       } else if (literalList != null) {
         for(Object lit: literalList) {
           buffer.append(' ');
-          buffer.append(lit.toString());
+          buffer.append(lit == null ? "null" : lit.toString());
         }
       }
       buffer.append(')');
@@ -146,13 +138,9 @@ final class SearchArgumentImpl implements SearchArgument {
     }
 
     private static boolean isEqual(Object left, Object right) {
-      if (left == right) {
-        return true;
-      } else if (left == null || right == null) {
-        return false;
-      } else {
-        return left.equals(right);
-      }
+
+      return left == right ||
+          (left != null && right != null && left.equals(right));
     }
 
     @Override
@@ -182,286 +170,315 @@ final class SearchArgumentImpl implements SearchArgument {
     }
   }
 
-  static class ExpressionBuilder {
-    // max threshold for CNF conversion. having >8 elements in andList will be converted to maybe
+
+  private final List<PredicateLeaf> leaves;
+  private final ExpressionTree expression;
+
+  SearchArgumentImpl(ExpressionTree expression, List<PredicateLeaf> leaves) {
+    this.expression = expression;
+    this.leaves = leaves;
+  }
+
+  // Used by kyro
+  @SuppressWarnings("unused")
+  SearchArgumentImpl() {
+        leaves = null;
+        expression = null;
+  }
+
+  @Override
+  public List<PredicateLeaf> getLeaves() {
+    return leaves;
+  }
+
+  @Override
+  public TruthValue evaluate(TruthValue[] leaves) {
+    return expression == null ? TruthValue.YES : expression.evaluate(leaves);
+  }
+
+  @Override
+  public ExpressionTree getExpression() {
+    return expression;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder buffer = new StringBuilder();
+    for(int i=0; i < leaves.size(); ++i) {
+      buffer.append("leaf-");
+      buffer.append(i);
+      buffer.append(" = ");
+      buffer.append(leaves.get(i).toString());
+      buffer.append('\n');
+    }
+    buffer.append("expr = ");
+    buffer.append(expression);
+    return buffer.toString();
+  }
+
+  public String toKryo() {
+    Output out = new Output(4 * 1024, 10 * 1024 * 1024);
+    new Kryo().writeObject(out, this);
+    out.close();
+    return Base64.encodeBase64String(out.toBytes());
+  }
+
+  static class BuilderImpl implements Builder {
+
+    // max threshold for CNF conversion. having >8 elements in andList will be
+    // converted to maybe
     private static final int CNF_COMBINATIONS_THRESHOLD = 256;
-    private final List<PredicateLeaf> leaves = new ArrayList<PredicateLeaf>();
 
-    /**
-     * Get the type of the given expression node.
-     * @param expr the expression to get the type of
-     * @return int, string, or float or null if we don't know the type
-     */
-    private static PredicateLeaf.Type getType(ExprNodeDesc expr) {
-      TypeInfo type = expr.getTypeInfo();
-      if (type.getCategory() == ObjectInspector.Category.PRIMITIVE) {
-        switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) {
-          case BYTE:
-          case SHORT:
-          case INT:
-            return PredicateLeaf.Type.INTEGER;
-          case LONG:
-            return PredicateLeaf.Type.LONG;
-          case CHAR:
-          case VARCHAR:
-          case STRING:
-            return PredicateLeaf.Type.STRING;
-          case FLOAT:
-          case DOUBLE:
-            return PredicateLeaf.Type.FLOAT;
-          case DATE:
-            return PredicateLeaf.Type.DATE;
-          case TIMESTAMP:
-            return PredicateLeaf.Type.TIMESTAMP;
-          case DECIMAL:
-            return PredicateLeaf.Type.DECIMAL;
-          case BOOLEAN:
-        	return PredicateLeaf.Type.BOOLEAN;
-          default:
-        }
-      }
-      return null;
+    private final Deque<ExpressionTree> currentTree =
+        new ArrayDeque<ExpressionTree>();
+    private final Map<PredicateLeaf, Integer> leaves =
+        new HashMap<PredicateLeaf, Integer>();
+    private final ExpressionTree root =
+        new ExpressionTree(ExpressionTree.Operator.AND);
+    {
+      currentTree.add(root);
     }
 
-    /**
-     * Get the column name referenced in the expression. It must be at the top
-     * level of this expression and there must be exactly one column.
-     * @param expr the expression to look in
-     * @param variable the slot the variable is expected in
-     * @return the column name or null if there isn't exactly one column
-     */
-    private static String getColumnName(ExprNodeGenericFuncDesc expr,
-                                        int variable) {
-      List<ExprNodeDesc> children = expr.getChildren();
-      if (variable < 0 || variable >= children.size()) {
-        return null;
+    @Override
+    public Builder startOr() {
+      ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR);
+      currentTree.getFirst().getChildren().add(node);
+      currentTree.addFirst(node);
+      return this;
+    }
+
+    @Override
+    public Builder startAnd() {
+      ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND);
+      currentTree.getFirst().getChildren().add(node);
+      currentTree.addFirst(node);
+      return this;
+    }
+
+    @Override
+    public Builder startNot() {
+      ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT);
+      currentTree.getFirst().getChildren().add(node);
+      currentTree.addFirst(node);
+      return this;
+    }
+
+    @Override
+    public Builder end() {
+      ExpressionTree current = currentTree.removeFirst();
+      if (current.getChildren().size() == 0) {
+        throw new IllegalArgumentException("Can't create expression " + root +
+            " with no children.");
       }
-      ExprNodeDesc child = children.get(variable);
-      if (child instanceof ExprNodeColumnDesc) {
-        return ((ExprNodeColumnDesc) child).getColumn();
+      if (current.getOperator() == ExpressionTree.Operator.NOT &&
+          current.getChildren().size() != 1) {
+        throw new IllegalArgumentException("Can't create not expression " +
+            current + " with more than 1 child.");
       }
-      return null;
+      return this;
     }
 
-    private static Object boxLiteral(ExprNodeConstantDesc lit) {
-      switch (getType(lit)) {
-        case INTEGER:
-          return ((Number) lit.getValue()).intValue();
-        case LONG:
-          return ((Number) lit.getValue()).longValue();
-        case STRING:
-          return StringUtils.stripEnd(lit.getValue().toString(), null);
-        case FLOAT:
-          return Double.parseDouble(lit.getValue().toString());
-        case DATE:
-        case TIMESTAMP:
-        case DECIMAL:
-        case BOOLEAN:
-          return lit;
-        default:
-          throw new IllegalArgumentException("Unknown literal " + getType(lit));
+    private int addLeaf(PredicateLeaf leaf) {
+      Integer result = leaves.get(leaf);
+      if (result == null) {
+        int id = leaves.size();
+        leaves.put(leaf, id);
+        return id;
+      } else {
+        return result;
       }
     }
 
-    private static Object getLiteral(ExprNodeGenericFuncDesc expr) {
-      Object result = null;
-      List<ExprNodeDesc> children = expr.getChildren();
-      if (children.size() != 2) {
-        return null;
-      }
-      for(ExprNodeDesc child: children) {
-        if (child instanceof ExprNodeConstantDesc) {
-          if (result != null) {
-            return null;
-          }
-          result = boxLiteral((ExprNodeConstantDesc) child);
-        }
+    @Override
+    public Builder lessThan(String column, PredicateLeaf.Type type,
+                            Object literal) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null || literal == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN,
+                type, column, literal, null);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
-      return result;
+      return this;
     }
 
-    private static List<Object> getLiteralList(ExprNodeGenericFuncDesc expr,
-                                               int start) {
-      List<Object> result = new ArrayList<Object>();
-      List<ExprNodeDesc> children = expr.getChildren();
-      // ignore the first child, since it is the variable
-      for(ExprNodeDesc child: children.subList(start, children.size())) {
-        if (child instanceof ExprNodeConstantDesc) {
-          result.add(boxLiteral((ExprNodeConstantDesc) child));
-        } else {
-          // if we get some non-literals, we need to punt
-          return null;
-        }
+    @Override
+    public Builder lessThanEquals(String column, PredicateLeaf.Type type,
+                                  Object literal) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null || literal == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS,
+                type, column, literal, null);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
-      return result;
+      return this;
     }
 
-    private ExpressionTree createLeaf(PredicateLeaf.Operator operator,
-                                      ExprNodeGenericFuncDesc expression,
-                                      List<PredicateLeaf> leafCache,
-                                      int variable) {
-      String columnName = getColumnName(expression, variable);
-      if (columnName == null) {
-        return new ExpressionTree(TruthValue.YES_NO_NULL);
-      }
-      PredicateLeaf.Type type = getType(expression.getChildren().get(variable));
-      if (type == null) {
-        return new ExpressionTree(TruthValue.YES_NO_NULL);
+    @Override
+    public Builder equals(String column, PredicateLeaf.Type type,
+                          Object literal) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null || literal == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS,
+                type, column, literal, null);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
+      return this;
+    }
 
-      Object literal = null;
-      List<Object> literalList = null;
-      switch (operator) {
-        case IS_NULL:
-          break;
-        case IN:
-        case BETWEEN:
-          literalList = getLiteralList(expression, variable + 1);
-          if (literalList == null) {
-            return new ExpressionTree(TruthValue.YES_NO_NULL);
-          }
-          break;
-        default:
-          literal = getLiteral(expression);
-          if (literal == null) {
-            return new ExpressionTree(TruthValue.YES_NO_NULL);
-          }
-          break;
+    @Override
+    public Builder nullSafeEquals(String column, PredicateLeaf.Type type,
+                                  Object literal) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null || literal == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+                type, column, literal, null);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
-      // if the variable was on the right, we need to swap things around
-      boolean needSwap = false;
-      if (variable != 0) {
-        if (operator == PredicateLeaf.Operator.LESS_THAN) {
-          needSwap = true;
-          operator = PredicateLeaf.Operator.LESS_THAN_EQUALS;
-        } else if (operator == PredicateLeaf.Operator.LESS_THAN_EQUALS) {
-          needSwap = true;
-          operator = PredicateLeaf.Operator.LESS_THAN;
+      return this;
+    }
+
+    @Override
+    public Builder in(String column, PredicateLeaf.Type type,
+                      Object... literal) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column  == null || literal == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        if (literal.length == 0) {
+          throw new IllegalArgumentException("Can't create in expression with "
+              + "no arguments");
         }
+        List<Object> argList = new ArrayList<Object>();
+        argList.addAll(Arrays.asList(literal));
+
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.IN,
+                type, column, null, argList);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
-      leafCache.add(new PredicateLeafImpl(operator, type, columnName,
-          literal, literalList));
-      ExpressionTree result = new ExpressionTree(leafCache.size() - 1);
-      if (needSwap) {
-        result = negate(result);
-      }
-      return result;
+      return this;
     }
 
-    /**
-     * Find the variable in the expression.
-     * @param expr the expression to look in
-     * @return the index of the variable or -1 if there is not exactly one
-     *   variable.
-     */
-    private int findVariable(ExprNodeDesc expr) {
-      int result = -1;
-      List<ExprNodeDesc> children = expr.getChildren();
-      for(int i = 0; i < children.size(); ++i) {
-        ExprNodeDesc child = children.get(i);
-        if (child instanceof ExprNodeColumnDesc) {
-          // if we already found a variable, this isn't a sarg
-          if (result != -1) {
-            return -1;
-          } else {
-            result = i;
-          }
-        }
+    @Override
+    public Builder isNull(String column, PredicateLeaf.Type type) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL,
+                type, column, null, null);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
-      return result;
+      return this;
     }
 
-    /**
-     * Create a leaf expression when we aren't sure where the variable is
-     * located.
-     * @param operator the operator type that was found
-     * @param expression the expression to check
-     * @param leafCache the list of leaves
-     * @return if the expression is a sarg, return it, otherwise null
-     */
-    private ExpressionTree createLeaf(PredicateLeaf.Operator operator,
-                                      ExprNodeGenericFuncDesc expression,
-                                      List<PredicateLeaf> leafCache) {
-      return createLeaf(operator, expression, leafCache,
-          findVariable(expression));
+    @Override
+    public Builder between(String column, PredicateLeaf.Type type, Object lower,
+                           Object upper) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null || lower == null || upper == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        List<Object> argList = new ArrayList<Object>();
+        argList.add(lower);
+        argList.add(upper);
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN,
+                type, column, null, argList);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
+      }
+      return this;
     }
 
-    private ExpressionTree negate(ExpressionTree expr) {
-      ExpressionTree result = new ExpressionTree(ExpressionTree.Operator.NOT);
-      result.getChildren().add(expr);
-      return result;
+    @Override
+    public Builder literal(TruthValue truth) {
+      ExpressionTree parent = currentTree.getFirst();
+      parent.getChildren().add(new ExpressionTree(truth));
+      return this;
     }
 
-    private void addChildren(ExpressionTree result,
-                             ExprNodeGenericFuncDesc node,
-                             List<PredicateLeaf> leafCache) {
-      for(ExprNodeDesc child: node.getChildren()) {
-        result.getChildren().add(parse(child, leafCache));
+    /**
+     * Recursively explore the tree to find the leaves that are still reachable
+     * after optimizations.
+     * @param tree the node to check next
+     * @param next the next available leaf id
+     * @param leafReorder
+     * @return the next available leaf id
+     */
+    static int compactLeaves(ExpressionTree tree, int next, int[] leafReorder) {
+      if (tree.getOperator() == ExpressionTree.Operator.LEAF) {
+        int oldLeaf = tree.getLeaf();
+        if (leafReorder[oldLeaf] == -1) {
+          leafReorder[oldLeaf] = next++;
+        }
+      } else if (tree.getChildren() != null){
+        for(ExpressionTree child: tree.getChildren()) {
+          next = compactLeaves(child, next, leafReorder);
+        }
       }
+      return next;
     }
 
     /**
-     * Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree.
-     * @param expression the Hive ExprNodeDesc
-     * @return the non-normalized ExpressionTree
+     * Rewrite expression tree to update the leaves.
+     * @param root the root of the tree to fix
+     * @param leafReorder a map from old leaf ids to new leaf ids
+     * @return the fixed root
      */
-    private ExpressionTree parse(ExprNodeDesc expression,
-                                 List<PredicateLeaf> leafCache) {
-      // if we don't know the expression, just assume maybe
-      if (expression.getClass() != ExprNodeGenericFuncDesc.class) {
-        return new ExpressionTree(TruthValue.YES_NO_NULL);
+    static ExpressionTree rewriteLeaves(ExpressionTree root,
+                                        int[] leafReorder) {
+      if (root.getOperator() == ExpressionTree.Operator.LEAF) {
+        return new ExpressionTree(leafReorder[root.getLeaf()]);
+      } else if (root.getChildren() != null){
+        List<ExpressionTree> children = root.getChildren();
+        for(int i=0; i < children.size(); ++i) {
+          children.set(i, rewriteLeaves(children.get(i), leafReorder));
+        }
       }
-      // get the kind of expression
-      ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression;
-      Class<?> op = expr.getGenericUDF().getClass();
-      ExpressionTree result;
-
-      // handle the logical operators
-      if (op == GenericUDFOPOr.class) {
-        result = new ExpressionTree(ExpressionTree.Operator.OR);
-        addChildren(result, expr, leafCache);
-      } else if (op == GenericUDFOPAnd.class) {
-        result = new ExpressionTree(ExpressionTree.Operator.AND);
-        addChildren(result, expr, leafCache);
-      } else if (op == GenericUDFOPNot.class) {
-        result = new ExpressionTree(ExpressionTree.Operator.NOT);
-        addChildren(result, expr, leafCache);
-      } else if (op == GenericUDFOPEqual.class) {
-        result = createLeaf(PredicateLeaf.Operator.EQUALS, expr, leafCache);
-      } else if (op == GenericUDFOPNotEqual.class) {
-        result = negate(createLeaf(PredicateLeaf.Operator.EQUALS, expr,
-            leafCache));
-      } else if (op == GenericUDFOPEqualNS.class) {
-        result = createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr,
-            leafCache);
-      } else if (op == GenericUDFOPGreaterThan.class) {
-        result = negate(createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS,
-            expr, leafCache));
-      } else if (op == GenericUDFOPEqualOrGreaterThan.class) {
-        result = negate(createLeaf(PredicateLeaf.Operator.LESS_THAN, expr,
-            leafCache));
-      } else if (op == GenericUDFOPLessThan.class) {
-        result = createLeaf(PredicateLeaf.Operator.LESS_THAN, expr, leafCache);
-      } else if (op == GenericUDFOPEqualOrLessThan.class) {
-        result = createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr,
-            leafCache);
-      } else if (op == GenericUDFIn.class) {
-        result = createLeaf(PredicateLeaf.Operator.IN, expr, leafCache, 0);
-      } else if (op == GenericUDFBetween.class) {
-        result = createLeaf(PredicateLeaf.Operator.BETWEEN, expr, leafCache,
-            1);
-      } else if (op == GenericUDFOPNull.class) {
-        result = createLeaf(PredicateLeaf.Operator.IS_NULL, expr, leafCache,
-            0);
-      } else if (op == GenericUDFOPNotNull.class) {
-        result = negate(createLeaf(PredicateLeaf.Operator.IS_NULL, expr,
-            leafCache, 0));
+      return root;
+    }
 
-      // otherwise, we didn't understand it, so mark it maybe
-      } else {
-        result = new ExpressionTree(TruthValue.YES_NO_NULL);
+    @Override
+    public SearchArgument build() {
+      if (currentTree.size() != 1) {
+        throw new IllegalArgumentException("Failed to end " +
+            currentTree.size() + " operations.");
       }
-      return result;
+      ExpressionTree optimized = pushDownNot(root);
+      optimized = foldMaybe(optimized);
+      optimized = flatten(optimized);
+      optimized = convertToCNF(optimized);
+      optimized = flatten(optimized);
+      int leafReorder[] = new int[leaves.size()];
+      Arrays.fill(leafReorder, -1);
+      int newLeafCount = compactLeaves(optimized, 0, leafReorder);
+      optimized = rewriteLeaves(optimized, leafReorder);
+      ArrayList<PredicateLeaf> leafList = new ArrayList<>(newLeafCount);
+      // expand list to correct size
+      for(int i=0; i < newLeafCount; ++i) {
+        leafList.add(null);
+      }
+      // build the new list
+      for(Map.Entry<PredicateLeaf, Integer> elem: leaves.entrySet()) {
+        int newLoc = leafReorder[elem.getValue()];
+        if (newLoc != -1) {
+          leafList.set(newLoc, elem.getKey());
+        }
+      }
+      return new SearchArgumentImpl(optimized, leafList);
     }
 
     /**
@@ -528,7 +545,7 @@ final class SearchArgumentImpl implements SearchArgument {
                 return child;
               default:
                 throw new IllegalStateException("Got a maybe as child of " +
-                  expr);
+                    expr);
             }
           } else {
             expr.getChildren().set(i, child);
@@ -542,6 +559,45 @@ final class SearchArgumentImpl implements SearchArgument {
     }
 
     /**
+     * Converts multi-level ands and ors into single level ones.
+     * @param root the expression to flatten
+     * @return the flattened expression, which will always be root with
+     *   potentially modified children.
+     */
+    static ExpressionTree flatten(ExpressionTree root) {
+      if (root.getChildren() != null) {
+        // iterate through the index, so that if we add more children,
+        // they don't get re-visited
+        for(int i=0; i < root.getChildren().size(); ++i) {
+          ExpressionTree child = flatten(root.getChildren().get(i));
+          // do we need to flatten?
+          if (child.getOperator() == root.getOperator() &&
+              child.getOperator() != ExpressionTree.Operator.NOT) {
+            boolean first = true;
+            for(ExpressionTree grandkid: child.getChildren()) {
+              // for the first grandkid replace the original parent
+              if (first) {
+                first = false;
+                root.getChildren().set(i, grandkid);
+              } else {
+                root.getChildren().add(++i, grandkid);
+              }
+            }
+          } else {
+            root.getChildren().set(i, child);
+          }
+        }
+        // if we have a singleton AND or OR, just return the child
+        if ((root.getOperator() == ExpressionTree.Operator.OR ||
+            root.getOperator() == ExpressionTree.Operator.AND) &&
+            root.getChildren().size() == 1) {
+          return root.getChildren().get(0);
+        }
+      }
+      return root;
+    }
+
+    /**
      * Generate all combinations of items on the andList. For each item on the
      * andList, it generates all combinations of one child from each and
      * expression. Thus, (and a b) (and c d) will be expanded to: (or a c)
@@ -554,7 +610,7 @@ final class SearchArgumentImpl implements SearchArgument {
     private static void generateAllCombinations(List<ExpressionTree> result,
                                                 List<ExpressionTree> andList,
                                                 List<ExpressionTree> nonAndList
-                                               ) {
+    ) {
       List<ExpressionTree> kids = andList.get(0).getChildren();
       if (result.isEmpty()) {
         for(ExpressionTree kid: kids) {
@@ -637,391 +693,5 @@ final class SearchArgumentImpl implements SearchArgument {
       return true;
     }
 
-    /**
-     * Converts multi-level ands and ors into single level ones.
-     * @param root the expression to flatten
-     * @return the flattened expression, which will always be root with
-     *   potentially modified children.
-     */
-    static ExpressionTree flatten(ExpressionTree root) {
-      if (root.getChildren() != null) {
-        // iterate through the index, so that if we add more children,
-        // they don't get re-visited
-        for(int i=0; i < root.getChildren().size(); ++i) {
-          ExpressionTree child = flatten(root.getChildren().get(i));
-          // do we need to flatten?
-          if (child.getOperator() == root.getOperator() &&
-              child.getOperator() != ExpressionTree.Operator.NOT) {
-            boolean first = true;
-            for(ExpressionTree grandkid: child.getChildren()) {
-              // for the first grandkid replace the original parent
-              if (first) {
-                first = false;
-                root.getChildren().set(i, grandkid);
-              } else {
-                root.getChildren().add(++i, grandkid);
-              }
-            }
-          } else {
-            root.getChildren().set(i, child);
-          }
-        }
-        // if we have a singleton AND or OR, just return the child
-        if ((root.getOperator() == ExpressionTree.Operator.OR ||
-             root.getOperator() == ExpressionTree.Operator.AND) &&
-            root.getChildren().size() == 1) {
-          return root.getChildren().get(0);
-        }
-      }
-      return root;
-    }
-
-    /**
-     * Iterates through the expression, finding all of the leaves. It creates
-     * the leaves list with each unique leaf that is found in the expression.
-     * The expression is updated with the new leaf ids for each leaf.
-     * @param expr the expression to find the leaves in
-     * @param leafCache the list of all of the leaves
-     * @param lookup a map that is used to uniquify the leaves
-     * @return The potentially modified expression
-     */
-    private ExpressionTree buildLeafList(ExpressionTree expr,
-                                         List<PredicateLeaf> leafCache,
-                                         Map<PredicateLeaf,
-                                             ExpressionTree> lookup) {
-      if (expr.getChildren() != null) {
-        for(int i=0; i < expr.getChildren().size(); ++i) {
-          expr.getChildren().set(i, buildLeafList(expr.getChildren().get(i),
-              leafCache, lookup));
-        }
-      } else if (expr.getOperator() == ExpressionTree.Operator.LEAF) {
-        PredicateLeaf leaf = leafCache.get(expr.getLeaf());
-        ExpressionTree val = lookup.get(leaf);
-        if (val == null) {
-          val = new ExpressionTree(leaves.size());
-          lookup.put(leaf, val);
-          leaves.add(leaf);
-        }
-        return val;
-      }
-      return expr;
-    }
-
-    /**
-     * Builds the expression and leaf list from the original predicate.
-     * @param expression the expression to translate
-     * @return The normalized expression.
-     */
-    ExpressionTree expression(ExprNodeGenericFuncDesc expression) {
-      List<PredicateLeaf> leafCache = new ArrayList<PredicateLeaf>();
-      ExpressionTree expr = parse(expression, leafCache);
-      return expression(expr, leafCache);
-    }
-
-    /**
-     * Builds the expression and optimized leaf list from a non-normalized
-     * expression. Sets the leaves field with the unique leaves.
-     * @param expr non-normalized expression
-     * @param leaves non-unique leaves
-     * @return the normalized expression
-     */
-    ExpressionTree expression(ExpressionTree expr,
-                              List<PredicateLeaf> leaves) {
-      expr = pushDownNot(expr);
-      expr = foldMaybe(expr);
-      expr = flatten(expr);
-      expr = convertToCNF(expr);
-      expr = flatten(expr);
-      expr =  buildLeafList(expr, leaves,
-          new HashMap<PredicateLeaf, ExpressionTree>());
-      return expr;
-    }
-
-    List<PredicateLeaf> getLeaves() {
-      return leaves;
-    }
-  }
-
-  private final List<PredicateLeaf> leaves;
-  private final ExpressionTree expression;
-
-  SearchArgumentImpl(ExprNodeGenericFuncDesc expr) {
-    if (expr == null) {
-      leaves = new ArrayList<PredicateLeaf>();
-      expression = null;
-    } else {
-      ExpressionBuilder builder = new ExpressionBuilder();
-      expression = builder.expression(expr);
-      leaves = builder.getLeaves();
-    }
-  }
-
-  SearchArgumentImpl() {
-    leaves = null;
-    expression = null;
-  }
-
-  SearchArgumentImpl(ExpressionTree expression, List<PredicateLeaf> leaves) {
-    this.expression = expression;
-    this.leaves = leaves;
-  }
-
-  @Override
-  public List<PredicateLeaf> getLeaves() {
-    return leaves;
-  }
-
-  @Override
-  public TruthValue evaluate(TruthValue[] leaves) {
-    return expression == null ? TruthValue.YES : expression.evaluate(leaves);
-  }
-
-  @Override
-  public ExpressionTree getExpression() {
-    return expression;
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder buffer = new StringBuilder();
-    for(int i=0; i < leaves.size(); ++i) {
-      buffer.append("leaf-");
-      buffer.append(i);
-      buffer.append(" = ");
-      buffer.append(leaves.get(i).toString());
-      buffer.append('\n');
-    }
-    buffer.append("expr = ");
-    buffer.append(expression);
-    return buffer.toString();
-  }
-
-  public String toKryo() {
-    Output out = new Output(4 * 1024, 10 * 1024 * 1024);
-    new Kryo().writeObject(out, this);
-    out.close();
-    return Base64.encodeBase64String(out.toBytes());
-  }
-
-  static SearchArgument fromKryo(String value) {
-    Input input = new Input(Base64.decodeBase64(value));
-    return new Kryo().readObject(input, SearchArgumentImpl.class);
-  }
-
-  private static class BuilderImpl implements Builder {
-    private final Deque<ExpressionTree> currentTree =
-        new ArrayDeque<ExpressionTree>();
-    private final List<PredicateLeaf> leaves = new ArrayList<PredicateLeaf>();
-    private ExpressionTree root = null;
-
-    @Override
-    public Builder startOr() {
-      ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR);
-      if (currentTree.size() != 0) {
-        ExpressionTree parent = currentTree.getFirst();
-        parent.getChildren().add(node);
-      }
-      currentTree.addFirst(node);
-      return this;
-    }
-
-    @Override
-    public Builder startAnd() {
-      ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND);
-      if (currentTree.size() != 0) {
-        ExpressionTree parent = currentTree.getFirst();
-        parent.getChildren().add(node);
-      }
-      currentTree.addFirst(node);
-      return this;
-    }
-
-    @Override
-    public Builder startNot() {
-      ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT);
-      if (currentTree.size() != 0) {
-        ExpressionTree parent = currentTree.getFirst();
-        parent.getChildren().add(node);
-      }
-      currentTree.addFirst(node);
-      return this;
-    }
-
-    @Override
-    public Builder end() {
-      root = currentTree.removeFirst();
-      if (root.getChildren().size() == 0) {
-        throw new IllegalArgumentException("Can't create expression " + root +
-            " with no children.");
-      }
-      if (root.getOperator() == ExpressionTree.Operator.NOT &&
-          root.getChildren().size() != 1) {
-        throw new IllegalArgumentException("Can't create not expression " +
-            root + " with more than 1 child.");
-      }
-      return this;
-    }
-
-    private static Object boxLiteral(Object literal) {
-      if (literal instanceof String ||
-          literal instanceof Long ||
-          literal instanceof Double ||
-          literal instanceof DateWritable ||
-          literal instanceof Timestamp ||
-          literal instanceof HiveDecimal ||
-          literal instanceof BigDecimal ||
-          literal instanceof Boolean) {
-        return literal;
-      } else if (literal instanceof HiveChar ||
-          literal instanceof HiveVarchar) {
-        return StringUtils.stripEnd(literal.toString(), null);
-      } else if (literal instanceof Byte ||
-          literal instanceof Short ||
-          literal instanceof Integer) {
-        return ((Number) literal).longValue();
-      } else if (literal instanceof Float) {
-        // to avoid change in precision when upcasting float to double
-        // we convert the literal to string and parse it as double. (HIVE-8460)
-        return Double.parseDouble(literal.toString());
-      } else {
-        throw new IllegalArgumentException("Unknown type for literal " +
-            literal);
-      }
-    }
-
-    private static PredicateLeaf.Type getType(Object literal) {
-      if (literal instanceof Byte ||
-          literal instanceof Short ||
-          literal instanceof Integer) {
-        return PredicateLeaf.Type.INTEGER;
-      } else if(literal instanceof Long){
-        return PredicateLeaf.Type.LONG;
-      }else if (literal instanceof HiveChar ||
-          literal instanceof HiveVarchar ||
-          literal instanceof String) {
-        return PredicateLeaf.Type.STRING;
-      } else if (literal instanceof Float ||
-          literal instanceof Double) {
-        return PredicateLeaf.Type.FLOAT;
-      } else if (literal instanceof DateWritable) {
-        return PredicateLeaf.Type.DATE;
-      } else if (literal instanceof Timestamp) {
-        return PredicateLeaf.Type.TIMESTAMP;
-      }else if (literal instanceof HiveDecimal ||
-          literal instanceof BigDecimal) {
-        return PredicateLeaf.Type.DECIMAL;
-      } else if (literal instanceof Boolean) {
-        return PredicateLeaf.Type.BOOLEAN;
-      }
-      throw new IllegalArgumentException("Unknown type for literal " + literal);
-    }
-
-    @Override
-    public Builder lessThan(String column, Object literal) {
-      ExpressionTree parent = currentTree.getFirst();
-      Object box = boxLiteral(literal);
-      PredicateLeaf leaf =
-          new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN,
-              getType(box), column, box, null);
-      leaves.add(leaf);
-      parent.getChildren().add(new ExpressionTree(leaves.size() - 1));
-      return this;
-    }
-
-    @Override
-    public Builder lessThanEquals(String column, Object literal) {
-      ExpressionTree parent = currentTree.getFirst();
-      Object box = boxLiteral(literal);
-      PredicateLeaf leaf =
-          new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS,
-              getType(box), column, box, null);
-      leaves.add(leaf);
-      parent.getChildren().add(new ExpressionTree(leaves.size() - 1));
-      return this;
-    }
-
-    @Override
-    public Builder equals(String column, Object literal) {
-      ExpressionTree parent = currentTree.getFirst();
-      Object box = boxLiteral(literal);
-      PredicateLeaf leaf =
-          new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS,
-              getType(box), column, box, null);
-      leaves.add(leaf);
-      parent.getChildren().add(new ExpressionTree(leaves.size() - 1));
-      return this;
-    }
-
-    @Override
-    public Builder nullSafeEquals(String column, Object literal) {
-      ExpressionTree parent = currentTree.getFirst();
-      Object box = boxLiteral(literal);
-      PredicateLeaf leaf =
-          new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-              getType(box), column, box, null);
-      leaves.add(leaf);
-      parent.getChildren().add(new ExpressionTree(leaves.size() - 1));
-      return this;
-    }
-
-    @Override
-    public Builder in(String column, Object... literal) {
-      ExpressionTree parent = currentTree.getFirst();
-      if (literal.length == 0) {
-        throw new IllegalArgumentException("Can't create in expression with "
-            + "no arguments");
-      }
-      List<Object> argList = new ArrayList<Object>();
-      for(Object lit: literal){
-        argList.add(boxLiteral(lit));
-      }
-
-      PredicateLeaf leaf =
-          new PredicateLeafImpl(PredicateLeaf.Operator.IN,
-              getType(argList.get(0)), column, null, argList);
-      leaves.add(leaf);
-      parent.getChildren().add(new ExpressionTree(leaves.size() - 1));
-      return this;
-    }
-
-    @Override
-    public Builder isNull(String column) {
-      ExpressionTree parent = currentTree.getFirst();
-      PredicateLeaf leaf =
-          new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL,
-              PredicateLeaf.Type.STRING, column, null, null);
-      leaves.add(leaf);
-      parent.getChildren().add(new ExpressionTree(leaves.size() - 1));
-      return this;
-    }
-
-    @Override
-    public Builder between(String column, Object lower, Object upper) {
-      ExpressionTree parent = currentTree.getFirst();
-      List<Object> argList = new ArrayList<Object>();
-      argList.add(boxLiteral(lower));
-      argList.add(boxLiteral(upper));
-      PredicateLeaf leaf =
-          new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN,
-              getType(argList.get(0)), column, null, argList);
-      leaves.add(leaf);
-      parent.getChildren().add(new ExpressionTree(leaves.size() - 1));
-      return this;
-    }
-
-    @Override
-    public SearchArgument build() {
-      if (currentTree.size() != 0) {
-        throw new IllegalArgumentException("Failed to end " +
-            currentTree.size() + " operations.");
-      }
-      ExpressionBuilder internal = new ExpressionBuilder();
-      ExpressionTree normalized = internal.expression(root, leaves);
-      return new SearchArgumentImpl(normalized, internal.getLeaves());
-    }
-  }
-
-  public static Builder newBuilder() {
-    return new BuilderImpl();
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index e96ab2a..e40e1d2 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -66,6 +66,7 @@ import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
 import org.apache.hadoop.hive.ql.io.InputFormatChecker;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy;
+import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
@@ -1746,8 +1747,8 @@ public class TestInputOutputFormat {
     types.add(builder.build());
     types.add(builder.build());
     SearchArgument isNull = SearchArgumentFactory.newBuilder()
-        .startAnd().isNull("cost").end().build();
-    conf.set(SearchArgumentFactory.SARG_PUSHDOWN, isNull.toKryo());
+        .startAnd().isNull("cost", PredicateLeaf.Type.INTEGER).end().build();
+    conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, isNull.toKryo());
     conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
         "url,cost");
     options.include(new boolean[]{true, true, false, true, false});
@@ -1791,7 +1792,7 @@ public class TestInputOutputFormat {
     SearchArgument sarg =
         SearchArgumentFactory.newBuilder()
             .startAnd()
-            .lessThan("z", new Integer(0))
+            .lessThan("z", PredicateLeaf.Type.INTEGER, new Integer(0))
             .end()
             .build();
     conf.set("sarg.pushdown", sarg.toKryo());
@@ -1833,7 +1834,7 @@ public class TestInputOutputFormat {
     SearchArgument sarg =
         SearchArgumentFactory.newBuilder()
             .startAnd()
-            .lessThan("z", new String("foo"))
+            .lessThan("z", PredicateLeaf.Type.STRING, new String("foo"))
             .end()
             .build();
     conf.set("sarg.pushdown", sarg.toKryo());

http://git-wip-us.apache.org/repos/asf/hive/blob/c178a6e9/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 255565e..4480d22 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.io.orc.OrcFile.Version;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
@@ -1922,9 +1923,9 @@ public class TestOrcFile {
     SearchArgument sarg = SearchArgumentFactory.newBuilder()
         .startAnd()
           .startNot()
-             .lessThan("int1", 300000)
+             .lessThan("int1", PredicateLeaf.Type.INTEGER, 300000)
           .end()
-          .lessThan("int1", 600000)
+          .lessThan("int1", PredicateLeaf.Type.INTEGER, 600000)
         .end()
         .build();
     RecordReader rows = reader.rowsOptions(new Reader.Options()
@@ -1945,7 +1946,7 @@ public class TestOrcFile {
     // look through the file with no rows selected
     sarg = SearchArgumentFactory.newBuilder()
         .startAnd()
-          .lessThan("int1", 0)
+          .lessThan("int1", PredicateLeaf.Type.INTEGER, 0)
         .end()
         .build();
     rows = reader.rowsOptions(new Reader.Options()
@@ -1958,9 +1959,9 @@ public class TestOrcFile {
     // select first 100 and last 100 rows
     sarg = SearchArgumentFactory.newBuilder()
         .startOr()
-          .lessThan("int1", 300 * 100)
+          .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 100)
           .startNot()
-            .lessThan("int1", 300 * 3400)
+            .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 3400)
           .end()
         .end()
         .build();