You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/09/16 03:18:51 UTC
hive git commit: HIVE-11705 : refactor SARG stripe filtering for ORC into a separate method (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

Repository: hive
Updated Branches:
  refs/heads/master bc62a46d1 -> ba0b33c10


HIVE-11705 : refactor SARG stripe filtering for ORC into a separate method (Sergey Shelukhin, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ba0b33c1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ba0b33c1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ba0b33c1

Branch: refs/heads/master
Commit: ba0b33c1025625b92bd669da60d2789f315e27f7
Parents: bc62a46
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Sep 15 18:09:54 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Sep 15 18:09:54 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   | 151 ++++++++++++-------
 .../apache/hadoop/hive/ql/io/orc/OrcSerde.java  |   1 +
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java |   4 +-
 .../hive/ql/io/parquet/ProjectionPusher.java    |   3 +-
 .../hive/ql/io/sarg/ConvertAstToSearchArg.java  |   4 +
 .../ql/optimizer/ColumnPrunerProcFactory.java   |   3 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   2 +
 .../hive/serde2/ColumnProjectionUtils.java      |  22 +++
 .../hive/ql/io/sarg/SearchArgumentFactory.java  |   5 +-
 .../hive/ql/io/sarg/SearchArgumentImpl.java     |   7 +-
 10 files changed, 142 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ba0b33c1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index cf8694e..2500fb6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -58,11 +58,13 @@ import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
 import org.apache.hadoop.hive.ql.io.InputFormatChecker;
 import org.apache.hadoop.hive.ql.io.RecordIdentifier;
 import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context;
 import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.serde2.SerDeStats;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -265,8 +267,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
     OrcProto.Type root = types.get(rootColumn);
     for(int i=0; i < root.getSubtypesCount(); ++i) {
       if (included.contains(i)) {
-        includeColumnRecursive(types, result, root.getSubtypes(i),
-            rootColumn);
+        includeColumnRecursive(types, result, root.getSubtypes(i), rootColumn);
       }
     }
     return result;
@@ -292,6 +293,13 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
     int rootColumn = getRootColumn(isOriginal);
     String[] columnNames = new String[types.size() - rootColumn];
     int i = 0;
+    // The way this works is as such. originalColumnNames is the equivalent on getNeededColumns
+    // from TSOP. They are assumed to be in the same order as the columns in ORC file, AND they are
+    // assumed to be equivalent to the columns in includedColumns (because it was generated from
+    // the same column list at some point in the past), minus the subtype columns. Therefore, when
+    // we go thru all the top level ORC file columns that are included, in order, they match
+    // originalColumnNames. This way, we do not depend on names stored inside ORC for SARG leaf
+    // column name resolution (see mapSargColumns method).
     for(int columnId: types.get(rootColumn).getSubtypesList()) {
       if (includedColumns == null || includedColumns[columnId - rootColumn]) {
         // this is guaranteed to be positive because types only have children
@@ -306,8 +314,8 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
                                 List<OrcProto.Type> types,
                                 Configuration conf,
                                 boolean isOriginal) {
-    String columnNamesString = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
-    if (columnNamesString == null) {
+    String neededColumnNames = getNeededColumnNamesString(conf);
+    if (neededColumnNames == null) {
       LOG.debug("No ORC pushdown predicate - no column names");
       options.searchArgument(null, null);
       return;
@@ -321,9 +329,39 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
 
     LOG.info("ORC pushdown predicate: " + sarg);
     options.searchArgument(sarg, getSargColumnNames(
-        columnNamesString.split(","), types, options.getInclude(), isOriginal));
+        neededColumnNames.split(","), types, options.getInclude(), isOriginal));
   }
 
+  static boolean canCreateSargFromConf(Configuration conf) {
+    if (getNeededColumnNamesString(conf) == null) {
+      LOG.debug("No ORC pushdown predicate - no column names");
+      return false;
+    }
+    if (!ConvertAstToSearchArg.canCreateFromConf(conf)) {
+      LOG.debug("No ORC pushdown predicate");
+      return false;
+    }
+    return true;
+  }
+
+  private static String[] extractNeededColNames(
+      List<OrcProto.Type> types, Configuration conf, boolean[] include, boolean isOriginal) {
+    return extractNeededColNames(types, getNeededColumnNamesString(conf), include, isOriginal);
+  }
+
+  private static String[] extractNeededColNames(
+      List<OrcProto.Type> types, String columnNamesString, boolean[] include, boolean isOriginal) {
+    return getSargColumnNames(columnNamesString.split(","), types, include, isOriginal);
+  }
+
+  private static String getNeededColumnNamesString(Configuration conf) {
+    return conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
+  }
+
+  private static String getSargColumnIDsString(Configuration conf) {
+    return conf.getBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, true) ? null
+        : conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
+  }
   @Override
   public boolean validateInput(FileSystem fs, HiveConf conf,
                                ArrayList<FileStatus> files
@@ -863,34 +901,11 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
 
       // we can't eliminate stripes if there are deltas because the
       // deltas may change the rows making them match the predicate.
-      if (deltas.isEmpty()) {
-        Reader.Options options = new Reader.Options();
-        options.include(includedCols);
-        setSearchArgument(options, types, context.conf, isOriginal);
-        // only do split pruning if HIVE-8732 has been fixed in the writer
-        if (options.getSearchArgument() != null &&
-            writerVersion != OrcFile.WriterVersion.ORIGINAL) {
-          SearchArgument sarg = options.getSearchArgument();
-          List<PredicateLeaf> sargLeaves = sarg.getLeaves();
-          List<StripeStatistics> stripeStats = metadata.getStripeStatistics();
-          int[] filterColumns = RecordReaderImpl.mapSargColumns(sargLeaves,
-              options.getColumnNames(), getRootColumn(isOriginal));
-
-          if (stripeStats != null) {
-            // eliminate stripes that doesn't satisfy the predicate condition
-            includeStripe = new boolean[stripes.size()];
-            for (int i = 0; i < stripes.size(); ++i) {
-              includeStripe[i] = (i >= stripeStats.size()) ||
-                  isStripeSatisfyPredicate(stripeStats.get(i), sarg,
-                      filterColumns);
-              if (isDebugEnabled && !includeStripe[i]) {
-                LOG.debug("Eliminating ORC stripe-" + i + " of file '" +
-                    file.getPath() + "'  as it did not satisfy " +
-                    "predicate condition.");
-              }
-            }
-          }
-        }
+      if (deltas.isEmpty() && canCreateSargFromConf(context.conf)) {
+        SearchArgument sarg = ConvertAstToSearchArg.createFromConf(context.conf);
+        String[] sargColNames = extractNeededColNames(types, context.conf, includedCols, isOriginal);
+        includeStripe = pickStripes(sarg, sargColNames, writerVersion, isOriginal,
+            metadata.getStripeStatistics(), stripes.size(), file.getPath());
       }
 
       // if we didn't have predicate pushdown, read everything
@@ -990,28 +1005,6 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
       }
       return orcReader.getRawDataSizeFromColIndices(internalColIds);
     }
-
-    private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics,
-                                             SearchArgument sarg,
-                                             int[] filterColumns) {
-      List<PredicateLeaf> predLeaves = sarg.getLeaves();
-      TruthValue[] truthValues = new TruthValue[predLeaves.size()];
-      for (int pred = 0; pred < truthValues.length; pred++) {
-        if (filterColumns[pred] != -1) {
-
-          // column statistics at index 0 contains only the number of rows
-          ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
-          truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
-        } else {
-
-          // parition column case.
-          // partition filter will be evaluated by partition pruner so
-          // we will not evaluate partition filter here.
-          truthValues[pred] = TruthValue.YES_NO_NULL;
-        }
-      }
-      return sarg.evaluate(truthValues).isNeeded();
-    }
   }
 
   static List<OrcSplit> generateSplitsInfo(Configuration conf)
@@ -1353,6 +1346,54 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
         directory);
   }
 
+  private static boolean[] pickStripes(SearchArgument sarg, String[] sargColNames,
+      WriterVersion writerVersion, boolean isOriginal, List<StripeStatistics> stripeStats,
+      int stripeCount, Path filePath) {
+    LOG.info("ORC pushdown predicate: " + sarg);
+    if (sarg == null || stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) {
+      return null; // only do split pruning if HIVE-8732 has been fixed in the writer
+    }
+    // eliminate stripes that doesn't satisfy the predicate condition
+    List<PredicateLeaf> sargLeaves = sarg.getLeaves();
+    int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves,
+        sargColNames, getRootColumn(isOriginal));
+    return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath);
+  }
+
+  private static boolean[] pickStripesInternal(SearchArgument sarg, int[] filterColumns,
+      List<StripeStatistics> stripeStats, int stripeCount, Path filePath) {
+    boolean[] includeStripe = new boolean[stripeCount];
+    for (int i = 0; i < includeStripe.length; ++i) {
+      includeStripe[i] = (i >= stripeStats.size()) ||
+          isStripeSatisfyPredicate(stripeStats.get(i), sarg, filterColumns);
+      if (isDebugEnabled && !includeStripe[i]) {
+        LOG.debug("Eliminating ORC stripe-" + i + " of file '" + filePath
+            + "'  as it did not satisfy predicate condition.");
+      }
+    }
+    return includeStripe;
+  }
+
+  private static boolean isStripeSatisfyPredicate(
+      StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns) {
+    List<PredicateLeaf> predLeaves = sarg.getLeaves();
+    TruthValue[] truthValues = new TruthValue[predLeaves.size()];
+    for (int pred = 0; pred < truthValues.length; pred++) {
+      if (filterColumns[pred] != -1) {
+
+        // column statistics at index 0 contains only the number of rows
+        ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
+        truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
+      } else {
+
+        // parition column case.
+        // partition filter will be evaluated by partition pruner so
+        // we will not evaluate partition filter here.
+        truthValues[pred] = TruthValue.YES_NO_NULL;
+      }
+    }
+    return sarg.evaluate(truthValues).isNeeded();
+  }
 
   @VisibleForTesting
   static SplitStrategy determineSplitStrategy(Context context, FileSystem fs, Path dir,

http://git-wip-us.apache.org/repos/asf/hive/blob/ba0b33c1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java
index 8beff4b..595f3b3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java
@@ -108,6 +108,7 @@ public class OrcSerde implements SerDe, VectorizedSerde {
     ArrayList<TypeInfo> fieldTypes =
         TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
     StructTypeInfo rootType = new StructTypeInfo();
+    // The source column names for ORC serde that will be used in the schema.
     rootType.setAllStructFieldNames(columnNames);
     rootType.setAllStructFieldTypeInfos(fieldTypes);
     inspector = OrcStruct.createObjectInspector(rootType);

http://git-wip-us.apache.org/repos/asf/hive/blob/ba0b33c1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index fcb3746..ba304ba 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -137,7 +137,7 @@ class RecordReaderImpl implements RecordReader {
    *                   result
    * @return an array mapping the sarg leaves to concrete column numbers
    */
-  public static int[] mapSargColumns(List<PredicateLeaf> sargLeaves,
+  public static int[] mapSargColumnsToOrcInternalColIdx(List<PredicateLeaf> sargLeaves,
                              String[] columnNames,
                              int rootColumn) {
     int[] result = new int[sargLeaves.size()];
@@ -693,7 +693,7 @@ class RecordReaderImpl implements RecordReader {
         List<OrcProto.Type> types, int includedCount) {
       this.sarg = sarg;
       sargLeaves = sarg.getLeaves();
-      filterColumns = mapSargColumns(sargLeaves, columnNames, 0);
+      filterColumns = mapSargColumnsToOrcInternalColIdx(sargLeaves, columnNames, 0);
       this.rowIndexStride = rowIndexStride;
       // included will not be null, row options will fill the array with trues if null
       sargColumns = new boolean[includedCount];

http://git-wip-us.apache.org/repos/asf/hive/blob/ba0b33c1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
index 4480600..4848efd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
@@ -65,6 +65,7 @@ public class ProjectionPusher {
     }
   }
 
+  @Deprecated  // Uses deprecated methods on ColumnProjectionUtils
   private void pushProjectionsAndFilters(final JobConf jobConf,
       final String splitPath, final String splitPathWithNoSchema) {
 
@@ -136,7 +137,7 @@ public class ProjectionPusher {
         filterExprSerialized);
   }
 
-
+  @Deprecated // Uses deprecated methods on ColumnProjectionUtils
   public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path)
       throws IOException {
     updateMrWork(jobConf);  // TODO: refactor this in HIVE-6366

http://git-wip-us.apache.org/repos/asf/hive/blob/ba0b33c1/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
index e034650..690b8c9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
@@ -433,4 +433,8 @@ public class ConvertAstToSearchArg {
     return null;
   }
 
+  public static boolean canCreateFromConf(Configuration conf) {
+    return conf.get(TableScanDesc.FILTER_EXPR_CONF_STR) != null || conf.get(SARG_PUSHDOWN) != null;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/ba0b33c1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
index 2dc15f9..b104a7d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
@@ -488,6 +488,9 @@ public final class ColumnPrunerProcFactory {
     }
   }
 
+  /** Sets up needed columns for TSOP. Mainly, transfers column names from input
+   * RowSchema as well as the needed virtual columns, into TableScanDesc.
+   */
   public static void setupNeededColumns(TableScanOperator scanOp, RowSchema inputRS,
       List<String> cols) throws SemanticException {
     List<Integer> neededColumnIds = new ArrayList<Integer>();

http://git-wip-us.apache.org/repos/asf/hive/blob/ba0b33c1/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 16957b6..1076dfd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9324,6 +9324,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     }
 
     if (top == null) {
+      // Determine row schema for TSOP.
+      // Include column names from SerDe, the partition and virtual columns.
       rwsch = new RowResolver();
       try {
         StructObjectInspector rowObjectInspector = (StructObjectInspector) tab

http://git-wip-us.apache.org/repos/asf/hive/blob/ba0b33c1/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
index 10086c5..cbad3b2 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
@@ -22,9 +22,12 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.StringUtils;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
 import com.google.common.collect.Lists;
 
@@ -33,6 +36,7 @@ import com.google.common.collect.Lists;
  *
  */
 public final class ColumnProjectionUtils {
+  public static final Log LOG = LogFactory.getLog(ColumnProjectionUtils.class);
 
   public static final String READ_COLUMN_IDS_CONF_STR = "hive.io.file.readcolumn.ids";
   public static final String READ_ALL_COLUMNS = "hive.io.file.read.all.columns";
@@ -54,6 +58,7 @@ public final class ColumnProjectionUtils {
    * and appendReadColumns
    */
   @Deprecated
+  @VisibleForTesting
   public static void setReadColumnIDs(Configuration conf, List<Integer> ids) {
     setReadColumnIDConf(conf, READ_COLUMN_IDS_CONF_STR_DEFAULT);
     appendReadColumns(conf, ids);
@@ -102,8 +107,21 @@ public final class ColumnProjectionUtils {
     conf.setBoolean(READ_ALL_COLUMNS, false);
   }
 
+  /**
+   * This method appends read column information to configuration to use for PPD. It is
+   * currently called with information from TSOP. Names come from TSOP input RowSchema, and
+   * IDs are the indexes inside the schema (which PPD assumes correspond to indexes inside the
+   * files to PPD in; something that would be invalid in many cases of schema evolution).
+   * @param conf Config to set values to.
+   * @param ids Column ids.
+   * @param names Column names.
+   */
   public static void appendReadColumns(
       Configuration conf, List<Integer> ids, List<String> names) {
+    if (ids.size() != names.size()) {
+      LOG.warn("Read column counts do not match: "
+          + ids.size() + " ids, " + names.size() + " names");
+    }
     appendReadColumns(conf, ids);
     appendReadColumnNames(conf, names);
   }
@@ -125,9 +143,13 @@ public final class ColumnProjectionUtils {
     List<Integer> result = new ArrayList<Integer>(list.length);
     for (String element : list) {
       // it may contain duplicates, remove duplicates
+      // TODO: WTF? This would break many assumptions elsewhere if it did.
+      //       Column names' and column ids' lists are supposed to be correlated.
       Integer toAdd = Integer.parseInt(element);
       if (!result.contains(toAdd)) {
         result.add(toAdd);
+      } else if (LOG.isInfoEnabled()) {
+        LOG.info("Duplicate ID " + toAdd + " in column ID list");
       }
     }
     return result;

http://git-wip-us.apache.org/repos/asf/hive/blob/ba0b33c1/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
index 0778935..8fda95c 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
@@ -19,10 +19,13 @@
 package org.apache.hadoop.hive.ql.io.sarg;
 
 /**
- * A factory for creating SearchArguments.
+ * A factory for creating SearchArguments, as well as modifying those created by this factory.
  */
 public class SearchArgumentFactory {
   public static SearchArgument.Builder newBuilder() {
     return new SearchArgumentImpl.BuilderImpl();
   }
+  public static void setPredicateLeafColumn(PredicateLeaf leaf, String newName) {
+    SearchArgumentImpl.PredicateLeafImpl.setColumnName(leaf, newName);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/ba0b33c1/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
index d27ac16..a762b8b 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
@@ -40,7 +40,7 @@ final class SearchArgumentImpl implements SearchArgument {
   static final class PredicateLeafImpl implements PredicateLeaf {
     private final Operator operator;
     private final Type type;
-    private final String columnName;
+    private String columnName;
     private final Object literal;
     private final List<Object> literalList;
 
@@ -165,6 +165,11 @@ final class SearchArgumentImpl implements SearchArgument {
              (literalList == null ? 0 : literalList.hashCode()) *
                  103 * 101 * 3 * 17;
     }
+
+    public static void setColumnName(PredicateLeaf leaf, String newName) {
+      assert leaf instanceof PredicateLeafImpl;
+      ((PredicateLeafImpl)leaf).columnName = newName;
+    }
   }