You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2014/10/14 21:07:05 UTC

svn commit: r1631841 [11/42] - in /hive/branches/llap: ./ accumulo-handler/ accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/ accumulo-handler/src/java/org/apache/hadoop/hiv...

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java Tue Oct 14 19:06:45 2014
@@ -418,138 +418,120 @@ class RunLengthIntegerWriterV2 implement
 
   private void determineEncoding() {
 
-    int idx = 0;
+    // we need to compute zigzag values for DIRECT encoding if we decide to
+    // break early for delta overflows or for shorter runs
+    computeZigZagLiterals();
 
-    // for identifying monotonic sequences
-    boolean isIncreasing = false;
-    int increasingCount = 1;
-    boolean isDecreasing = false;
-    int decreasingCount = 1;
+    zzBits100p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 1.0);
 
-    // for identifying type of delta encoding
-    min = literals[0];
-    long max = literals[0];
-    isFixedDelta = true;
-    long currDelta = 0;
+    // not a big win for shorter runs to determine encoding
+    if (numLiterals <= MIN_REPEAT) {
+      encoding = EncodingType.DIRECT;
+      return;
+    }
 
-    min = literals[0];
-    long deltaMax = 0;
+    // DELTA encoding check
 
-    // populate all variables to identify the encoding type
-    if (numLiterals >= 1) {
-      currDelta = literals[1] - literals[0];
-      for(int i = 0; i < numLiterals; i++) {
-        if (i > 0 && literals[i] >= max) {
-          max = literals[i];
-          increasingCount++;
-        }
+    // for identifying monotonic sequences
+    boolean isIncreasing = true;
+    boolean isDecreasing = true;
+    this.isFixedDelta = true;
 
-        if (i > 0 && literals[i] <= min) {
-          min = literals[i];
-          decreasingCount++;
-        }
+    this.min = literals[0];
+    long max = literals[0];
+    final long initialDelta = literals[1] - literals[0];
+    long currDelta = initialDelta;
+    long deltaMax = initialDelta;
+    this.adjDeltas[0] = initialDelta;
+
+    for (int i = 1; i < numLiterals; i++) {
+      final long l1 = literals[i];
+      final long l0 = literals[i - 1];
+      currDelta = l1 - l0;
+      min = Math.min(min, l1);
+      max = Math.max(max, l1);
+
+      isIncreasing &= (l0 <= l1);
+      isDecreasing &= (l0 >= l1);
+
+      isFixedDelta &= (currDelta == initialDelta);
+      if (i > 1) {
+        adjDeltas[i - 1] = Math.abs(currDelta);
+        deltaMax = Math.max(deltaMax, adjDeltas[i - 1]);
+      }
+    }
 
-        // if delta doesn't changes then mark it as fixed delta
-        if (i > 0 && isFixedDelta) {
-          if (literals[i] - literals[i - 1] != currDelta) {
-            isFixedDelta = false;
-          }
+    // its faster to exit under delta overflow condition without checking for
+    // PATCHED_BASE condition as encoding using DIRECT is faster and has less
+    // overhead than PATCHED_BASE
+    if (!utils.isSafeSubtract(max, min)) {
+      encoding = EncodingType.DIRECT;
+      return;
+    }
 
-          fixedDelta = currDelta;
-        }
+    // invariant - subtracting any number from any other in the literals after
+    // this point won't overflow
 
-        // populate zigzag encoded literals
-        long zzEncVal = 0;
-        if (signed) {
-          zzEncVal = utils.zigzagEncode(literals[i]);
-        } else {
-          zzEncVal = literals[i];
-        }
-        zigzagLiterals[idx] = zzEncVal;
-        idx++;
+    // if initialDelta is 0 then we cannot delta encode as we cannot identify
+    // the sign of deltas (increasing or decreasing)
+    if (initialDelta != 0) {
+
+      // if min is equal to max then the delta is 0, this condition happens for
+      // fixed values run >10 which cannot be encoded with SHORT_REPEAT
+      if (min == max) {
+        assert isFixedDelta : min + "==" + max +
+            ", isFixedDelta cannot be false";
+        assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
+        fixedDelta = 0;
+        encoding = EncodingType.DELTA;
+        return;
+      }
 
-        // max delta value is required for computing the fixed bits
-        // required for delta blob in delta encoding
-        if (i > 0) {
-          if (i == 1) {
-            // first value preserve the sign
-            adjDeltas[i - 1] = literals[i] - literals[i - 1];
-          } else {
-            adjDeltas[i - 1] = Math.abs(literals[i] - literals[i - 1]);
-            if (adjDeltas[i - 1] > deltaMax) {
-              deltaMax = adjDeltas[i - 1];
-            }
-          }
-        }
+      if (isFixedDelta) {
+        assert currDelta == initialDelta
+            : "currDelta should be equal to initialDelta for fixed delta encoding";
+        encoding = EncodingType.DELTA;
+        fixedDelta = currDelta;
+        return;
       }
 
       // stores the number of bits required for packing delta blob in
       // delta encoding
       bitsDeltaMax = utils.findClosestNumBits(deltaMax);
 
-      // if decreasing count equals total number of literals then the
-      // sequence is monotonically decreasing
-      if (increasingCount == 1 && decreasingCount == numLiterals) {
-        isDecreasing = true;
-      }
-
-      // if increasing count equals total number of literals then the
-      // sequence is monotonically increasing
-      if (decreasingCount == 1 && increasingCount == numLiterals) {
-        isIncreasing = true;
+      // monotonic condition
+      if (isIncreasing || isDecreasing) {
+        encoding = EncodingType.DELTA;
+        return;
       }
     }
 
-    // if the sequence is both increasing and decreasing then it is not
-    // monotonic
-    if (isDecreasing && isIncreasing) {
-      isDecreasing = false;
-      isIncreasing = false;
-    }
-
-    // fixed delta condition
-    if (isIncreasing == false && isDecreasing == false && isFixedDelta == true) {
-      encoding = EncodingType.DELTA;
-      return;
-    }
-
-    // monotonic condition
-    if (isIncreasing || isDecreasing) {
-      encoding = EncodingType.DELTA;
-      return;
-    }
+    // PATCHED_BASE encoding check
 
     // percentile values are computed for the zigzag encoded values. if the
     // number of bit requirement between 90th and 100th percentile varies
     // beyond a threshold then we need to patch the values. if the variation
-    // is not significant then we can use direct or delta encoding
-
-    double p = 0.9;
-    zzBits90p = utils.percentileBits(zigzagLiterals, 0, numLiterals, p);
-
-    p = 1.0;
-    zzBits100p = utils.percentileBits(zigzagLiterals, 0, numLiterals, p);
+    // is not significant then we can use direct encoding
 
+    zzBits90p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 0.9);
     int diffBitsLH = zzBits100p - zzBits90p;
 
     // if the difference between 90th percentile and 100th percentile fixed
     // bits is > 1 then we need patch the values
-    if (isIncreasing == false && isDecreasing == false && diffBitsLH > 1
-        && isFixedDelta == false) {
+    if (diffBitsLH > 1) {
+
       // patching is done only on base reduced values.
       // remove base from literals
-      for(int i = 0; i < numLiterals; i++) {
+      for (int i = 0; i < numLiterals; i++) {
         baseRedLiterals[i] = literals[i] - min;
       }
 
       // 95th percentile width is used to determine max allowed value
       // after which patching will be done
-      p = 0.95;
-      brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, p);
+      brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 0.95);
 
       // 100th percentile is used to compute the max patch width
-      p = 1.0;
-      brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, p);
+      brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 1.0);
 
       // after base reducing the values, if the difference in bits between
       // 95th percentile and 100th percentile value is zero then there
@@ -565,19 +547,24 @@ class RunLengthIntegerWriterV2 implement
         encoding = EncodingType.DIRECT;
         return;
       }
-    }
-
-    // if difference in bits between 95th percentile and 100th percentile is
-    // 0, then patch length will become 0. Hence we will fallback to direct
-    if (isIncreasing == false && isDecreasing == false && diffBitsLH <= 1
-        && isFixedDelta == false) {
+    } else {
+      // if difference in bits between 95th percentile and 100th percentile is
+      // 0, then patch length will become 0. Hence we will fallback to direct
       encoding = EncodingType.DIRECT;
       return;
     }
+  }
 
-    // this should not happen
-    if (encoding == null) {
-      throw new RuntimeException("Integer encoding cannot be determined.");
+  private void computeZigZagLiterals() {
+    // populate zigzag encoded literals
+    long zzEncVal = 0;
+    for (int i = 0; i < numLiterals; i++) {
+      if (signed) {
+        zzEncVal = utils.zigzagEncode(literals[i]);
+      } else {
+        zzEncVal = literals[i];
+      }
+      zigzagLiterals[i] = zzEncVal;
     }
   }
 
@@ -700,7 +687,7 @@ class RunLengthIntegerWriterV2 implement
     patchWidth = 0;
     gapVsPatchList = null;
     min = 0;
-    isFixedDelta = false;
+    isFixedDelta = true;
   }
 
   @Override

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java Tue Oct 14 19:06:45 2014
@@ -1283,4 +1283,9 @@ final class SerializationUtils {
         + ((readBuffer[rbOffset + 7] & 255) << 0));
   }
 
+  // Do not want to use Guava LongMath.checkedSubtract() here as it will throw
+  // ArithmeticException in case of overflow
+  public boolean isSafeSubtract(long left, long right) {
+    return (left ^ right) >= 0 | (left ^ (left - right)) >= 0;
+  }
 }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Tue Oct 14 19:06:45 2014
@@ -485,6 +485,7 @@ class WriterImpl implements Writer, Memo
         modifiers = EnumSet.of(Modifier.FASTEST, Modifier.BINARY);
         break;
       default:
+        LOG.warn("Missing ORC compression modifiers for " + kind);
         modifiers = null;
         break;
       }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java Tue Oct 14 19:06:45 2014
@@ -24,6 +24,8 @@ import java.util.zip.DataFormatException
 import java.util.zip.Deflater;
 import java.util.zip.Inflater;
 
+import javax.annotation.Nullable;
+
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType;
 import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
@@ -130,7 +132,12 @@ class ZlibCodec implements CompressionCo
   }
 
   @Override
-  public CompressionCodec modify(EnumSet<Modifier> modifiers) {
+  public CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers) {
+
+    if (modifiers == null) {
+      return this;
+    }
+
     int l = this.level;
     int s = this.strategy;
 

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java Tue Oct 14 19:06:45 2014
@@ -13,9 +13,6 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.convert;
 
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.Writable;
 
@@ -33,7 +30,7 @@ public class ArrayWritableGroupConverter
   private Writable[] mapPairContainer;
 
   public ArrayWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent,
-      final int index, List<TypeInfo> hiveSchemaTypeInfos) {
+      final int index) {
     this.parent = parent;
     this.index = index;
     int count = groupType.getFieldCount();
@@ -43,8 +40,7 @@ public class ArrayWritableGroupConverter
     isMap = count == 2;
     converters = new Converter[count];
     for (int i = 0; i < count; i++) {
-      converters[i] = getConverterFromDescription(groupType.getType(i), i, this,
-          hiveSchemaTypeInfos);
+      converters[i] = getConverterFromDescription(groupType.getType(i), i, this);
     }
   }
 

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java Tue Oct 14 19:06:45 2014
@@ -16,7 +16,6 @@ package org.apache.hadoop.hive.ql.io.par
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.Writable;
 
@@ -37,21 +36,19 @@ public class DataWritableGroupConverter 
   private final Object[] currentArr;
   private Writable[] rootMap;
 
-  public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema,
-      final List<TypeInfo> hiveSchemaTypeInfos) {
-    this(requestedSchema, null, 0, tableSchema, hiveSchemaTypeInfos);
+  public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema) {
+    this(requestedSchema, null, 0, tableSchema);
     final int fieldCount = tableSchema.getFieldCount();
     this.rootMap = new Writable[fieldCount];
   }
 
   public DataWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent,
-      final int index, final List<TypeInfo> hiveSchemaTypeInfos) {
-    this(groupType, parent, index, groupType, hiveSchemaTypeInfos);
+      final int index) {
+    this(groupType, parent, index, groupType);
   }
 
   public DataWritableGroupConverter(final GroupType selectedGroupType,
-      final HiveGroupConverter parent, final int index, final GroupType containingGroupType,
-      final List<TypeInfo> hiveSchemaTypeInfos) {
+      final HiveGroupConverter parent, final int index, final GroupType containingGroupType) {
     this.parent = parent;
     this.index = index;
     final int totalFieldCount = containingGroupType.getFieldCount();
@@ -65,8 +62,7 @@ public class DataWritableGroupConverter 
       Type subtype = selectedFields.get(i);
       if (containingGroupType.getFields().contains(subtype)) {
         converters[i] = getConverterFromDescription(subtype,
-            containingGroupType.getFieldIndex(subtype.getName()), this,
-            hiveSchemaTypeInfos);
+            containingGroupType.getFieldIndex(subtype.getName()), this);
       } else {
         throw new IllegalStateException("Group type [" + containingGroupType +
             "] does not contain requested field: " + subtype);

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java Tue Oct 14 19:06:45 2014
@@ -31,10 +31,8 @@ public class DataWritableRecordConverter
 
   private final DataWritableGroupConverter root;
 
-  public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema,
-      final List<TypeInfo> hiveColumnTypeInfos) {
-    this.root = new DataWritableGroupConverter(requestedSchema, tableSchema,
-        hiveColumnTypeInfos);
+  public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema) {
+    this.root = new DataWritableGroupConverter(requestedSchema, tableSchema);
   }
 
   @Override

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Tue Oct 14 19:06:45 2014
@@ -16,19 +16,12 @@ package org.apache.hadoop.hive.ql.io.par
 import java.math.BigDecimal;
 import java.sql.Timestamp;
 import java.util.ArrayList;
-import java.util.List;
 
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
 import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
-import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.FloatWritable;
@@ -152,32 +145,6 @@ public enum ETypeConverter {
         }
       };
     }
-  },
-  ECHAR_CONVERTER(HiveCharWritable.class) {
-    @Override
-    Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
-      return new BinaryConverter<HiveCharWritable>(type, parent, index) {
-        @Override
-        protected HiveCharWritable convert(Binary binary) {
-          HiveChar hiveChar = new HiveChar();
-          hiveChar.setValue(binary.toStringUsingUTF8());
-          return new HiveCharWritable(hiveChar);
-        }
-      };
-    }
-  },
-  EVARCHAR_CONVERTER(HiveVarcharWritable.class) {
-    @Override
-    Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
-      return new BinaryConverter<HiveVarcharWritable>(type, parent, index) {
-        @Override
-        protected HiveVarcharWritable convert(Binary binary) {
-          HiveVarchar hiveVarchar = new HiveVarchar();
-          hiveVarchar.setValue(binary.toStringUsingUTF8());
-          return new HiveVarcharWritable(hiveVarchar);
-        }
-      };
-    }
   };
 
   final Class<?> _type;
@@ -193,7 +160,7 @@ public enum ETypeConverter {
   abstract Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent);
 
   public static Converter getNewConverter(final PrimitiveType type, final int index,
-      final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) {
+      final HiveGroupConverter parent) {
     if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) {
       //TODO- cleanup once parquet support Timestamp type annotation.
       return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent);
@@ -201,15 +168,7 @@ public enum ETypeConverter {
     if (OriginalType.DECIMAL == type.getOriginalType()) {
       return EDECIMAL_CONVERTER.getConverter(type, index, parent);
     } else if (OriginalType.UTF8 == type.getOriginalType()) {
-      if (hiveSchemaTypeInfos.get(index).getTypeName()
-          .startsWith(serdeConstants.CHAR_TYPE_NAME)) {
-        return ECHAR_CONVERTER.getConverter(type, index, parent);
-      } else if (hiveSchemaTypeInfos.get(index).getTypeName()
-          .startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
-        return EVARCHAR_CONVERTER.getConverter(type, index, parent);
-      } else if (type.isPrimitive()) {
-        return ESTRING_CONVERTER.getConverter(type, index, parent);
-      }
+      return ESTRING_CONVERTER.getConverter(type, index, parent);
     }
 
     Class<?> javaType = type.getPrimitiveTypeName().javaType;

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java Tue Oct 14 19:06:45 2014
@@ -13,9 +13,6 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.convert;
 
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.Writable;
 
 import parquet.io.api.Converter;
@@ -26,20 +23,17 @@ import parquet.schema.Type.Repetition;
 public abstract class HiveGroupConverter extends GroupConverter {
 
   protected static Converter getConverterFromDescription(final Type type, final int index,
-      final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) {
+      final HiveGroupConverter parent) {
     if (type == null) {
       return null;
     }
     if (type.isPrimitive()) {
-      return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent,
-          hiveSchemaTypeInfos);
+      return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent);
     } else {
       if (type.asGroupType().getRepetition() == Repetition.REPEATED) {
-        return new ArrayWritableGroupConverter(type.asGroupType(), parent, index,
-            hiveSchemaTypeInfos);
+        return new ArrayWritableGroupConverter(type.asGroupType(), parent, index);
       } else {
-        return new DataWritableGroupConverter(type.asGroupType(), parent, index,
-            hiveSchemaTypeInfos);
+        return new DataWritableGroupConverter(type.asGroupType(), parent, index);
       }
     }
   }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java Tue Oct 14 19:06:45 2014
@@ -14,7 +14,6 @@
 package org.apache.hadoop.hive.ql.io.parquet.read;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -24,8 +23,6 @@ import org.apache.hadoop.hive.ql.io.IOCo
 import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.util.StringUtils;
 
@@ -56,7 +53,7 @@ public class DataWritableReadSupport ext
    * From a string which columns names (including hive column), return a list
    * of string columns
    *
-   * @param comma separated list of columns
+   * @param columns comma separated list of columns
    * @return list with virtual columns removed
    */
   private static List<String> getColumns(final String columns) {
@@ -64,27 +61,6 @@ public class DataWritableReadSupport ext
         removeVirtualColumns(StringUtils.getStringCollection(columns));
   }
 
-  private static List<TypeInfo> getColumnTypes(Configuration configuration) {
-
-    List<String> columnNames;
-    String columnNamesProperty = configuration.get(IOConstants.COLUMNS);
-    if (columnNamesProperty.length() == 0) {
-      columnNames = new ArrayList<String>();
-    } else {
-      columnNames = Arrays.asList(columnNamesProperty.split(","));
-    }
-    List<TypeInfo> columnTypes;
-    String columnTypesProperty = configuration.get(IOConstants.COLUMNS_TYPES);
-    if (columnTypesProperty.length() == 0) {
-      columnTypes = new ArrayList<TypeInfo>();
-    } else {
-      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesProperty);
-    }
-
-    columnTypes = VirtualColumn.removeVirtualColumnTypes(columnNames, columnTypes);
-    return columnTypes;
-  }
-
   /**
    *
    * It creates the readContext for Parquet side with the requested schema during the init phase.
@@ -99,6 +75,7 @@ public class DataWritableReadSupport ext
       final Map<String, String> keyValueMetaData, final MessageType fileSchema) {
     final String columns = configuration.get(IOConstants.COLUMNS);
     final Map<String, String> contextMetadata = new HashMap<String, String>();
+    final boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);
     if (columns != null) {
       final List<String> listColumns = getColumns(columns);
       final Map<String, String> lowerCaseFileSchemaColumns = new HashMap<String,String>();
@@ -106,45 +83,50 @@ public class DataWritableReadSupport ext
         lowerCaseFileSchemaColumns.put(c.getPath()[0].toLowerCase(), c.getPath()[0]);
       }
       final List<Type> typeListTable = new ArrayList<Type>();
-      for (String col : listColumns) {
-        col = col.toLowerCase();
-        // listColumns contains partition columns which are metadata only
-        if (lowerCaseFileSchemaColumns.containsKey(col)) {
-          typeListTable.add(fileSchema.getType(lowerCaseFileSchemaColumns.get(col)));
-        } else {
-          // below allows schema evolution
-          typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col));
+      if(indexAccess) {
+        for (int index = 0; index < listColumns.size(); index++) {
+          //Take columns based on index or pad the field
+          if(index < fileSchema.getFieldCount()) {
+            typeListTable.add(fileSchema.getType(index));
+          } else {
+            //prefixing with '_mask_' to ensure no conflict with named
+            //columns in the file schema
+            typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "_mask_"+listColumns.get(index)));
+          }
+        }
+      } else {
+        for (String col : listColumns) {
+          col = col.toLowerCase();
+          // listColumns contains partition columns which are metadata only
+          if (lowerCaseFileSchemaColumns.containsKey(col)) {
+            typeListTable.add(fileSchema.getType(lowerCaseFileSchemaColumns.get(col)));
+          } else {
+            // below allows schema evolution
+            typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col));
+          }
         }
       }
       MessageType tableSchema = new MessageType(TABLE_SCHEMA, typeListTable);
       contextMetadata.put(HIVE_SCHEMA_KEY, tableSchema.toString());
 
-      MessageType requestedSchemaByUser = tableSchema;
       final List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration);
 
       final List<Type> typeListWanted = new ArrayList<Type>();
-      final boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);
+
       for (final Integer idx : indexColumnsWanted) {
         if (idx < listColumns.size()) {
           String col = listColumns.get(idx);
           if (indexAccess) {
-            typeListWanted.add(tableSchema.getType(col));
+              typeListWanted.add(fileSchema.getFields().get(idx));
           } else {
             col = col.toLowerCase();
             if (lowerCaseFileSchemaColumns.containsKey(col)) {
               typeListWanted.add(tableSchema.getType(lowerCaseFileSchemaColumns.get(col)));
-            } else {
-              // should never occur?
-              String msg = "Column " + col + " at index " + idx + " does not exist in " +
-              lowerCaseFileSchemaColumns;
-              throw new IllegalStateException(msg);
             }
           }
         }
       }
-      requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
-              typeListWanted), fileSchema, configuration);
-
+      MessageType requestedSchemaByUser = new MessageType(fileSchema.getName(), typeListWanted);
       return new ReadContext(requestedSchemaByUser, contextMetadata);
     } else {
       contextMetadata.put(HIVE_SCHEMA_KEY, fileSchema.toString());
@@ -171,27 +153,7 @@ public class DataWritableReadSupport ext
       throw new IllegalStateException("ReadContext not initialized properly. " +
         "Don't know the Hive Schema.");
     }
-    final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser.
-        parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration);
-    return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema,
-        getColumnTypes(configuration));
-  }
-
-  /**
-  * Determine the file column names based on the position within the requested columns and
-  * use that as the requested schema.
-  */
-  private MessageType resolveSchemaAccess(MessageType requestedSchema, MessageType fileSchema,
-          Configuration configuration) {
-    if (configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false)) {
-      final List<String> listColumns = getColumns(configuration.get(IOConstants.COLUMNS));
-      List<Type> requestedTypes = new ArrayList<Type>();
-      for(Type t : requestedSchema.getFields()) {
-        int index = listColumns.indexOf(t.getName());
-        requestedTypes.add(fileSchema.getType(index));
-      }
-      requestedSchema = new MessageType(requestedSchema.getName(), requestedTypes);
-    }
-    return requestedSchema;
+    final MessageType tableSchema = MessageTypeParser.parseMessageType(metadata.get(HIVE_SCHEMA_KEY));
+    return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema);
   }
 }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java Tue Oct 14 19:06:45 2014
@@ -14,6 +14,7 @@
 package org.apache.hadoop.hive.ql.io.parquet.timestamp;
 
 import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 
 import parquet.Preconditions;
 import parquet.io.api.Binary;
@@ -28,7 +29,10 @@ public class NanoTime {
   public static NanoTime fromBinary(Binary bytes) {
     Preconditions.checkArgument(bytes.length() == 12, "Must be 12 bytes");
     ByteBuffer buf = bytes.toByteBuffer();
-    return new NanoTime(buf.getInt(), buf.getLong());
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+    long timeOfDayNanos = buf.getLong();
+    int julianDay = buf.getInt();
+    return new NanoTime(julianDay, timeOfDayNanos);
   }
 
   public NanoTime(int julianDay, long timeOfDayNanos) {
@@ -46,8 +50,9 @@ public class NanoTime {
 
   public Binary toBinary() {
     ByteBuffer buf = ByteBuffer.allocate(12);
-    buf.putInt(julianDay);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
     buf.putLong(timeOfDayNanos);
+    buf.putInt(julianDay);
     buf.flip();
     return Binary.fromByteBuffer(buf);
   }
@@ -60,4 +65,4 @@ public class NanoTime {
   public String toString() {
     return "NanoTime{julianDay="+julianDay+", timeOfDayNanos="+timeOfDayNanos+"}";
   }
-}
\ No newline at end of file
+}

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java Tue Oct 14 19:06:45 2014
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.lockmg
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.*;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.thrift.TException;
@@ -42,10 +43,10 @@ public class DbLockManager implements Hi
   private static final long MAX_SLEEP = 15000;
   private HiveLockManagerCtx context;
   private Set<DbHiveLock> locks;
-  private HiveMetaStoreClient client;
+  private IMetaStoreClient client;
   private long nextSleep = 50;
 
-  DbLockManager(HiveMetaStoreClient client) {
+  DbLockManager(IMetaStoreClient client) {
     locks = new HashSet<DbHiveLock>();
     this.client = client;
   }
@@ -210,8 +211,8 @@ public class DbLockManager implements Hi
   /**
    * Clear the memory of the locks in this object.  This won't clear the locks from the database.
    * It is for use with
-   * {@link #DbLockManager(org.apache.hadoop.hive.metastore.HiveMetaStoreClient).commitTxn} and
-   * {@link #DbLockManager(org.apache.hadoop.hive.metastore.HiveMetaStoreClient).rollbackTxn}.
+   * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient).commitTxn} and
+   * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient).rollbackTxn}.
    */
   void clearLocalLockRecords() {
     locks.clear();

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java Tue Oct 14 19:06:45 2014
@@ -31,6 +31,8 @@ import org.apache.hadoop.hive.ql.QueryPl
 import org.apache.hadoop.hive.ql.hooks.Entity;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.thrift.TException;
 
@@ -46,7 +48,7 @@ public class DbTxnManager extends HiveTx
   static final private Log LOG = LogFactory.getLog(CLASS_NAME);
 
   private DbLockManager lockMgr = null;
-  private HiveMetaStoreClient client = null;
+  private IMetaStoreClient client = null;
   private long txnId = 0;
 
   DbTxnManager() {
@@ -284,7 +286,7 @@ public class DbTxnManager extends HiveTx
   public ValidTxnList getValidTxns() throws LockException {
     init();
     try {
-      return client.getValidTxns();
+      return client.getValidTxns(txnId);
     } catch (TException e) {
       throw new LockException(ErrorMsg.METASTORE_COMMUNICATION_FAILED.getMsg(),
           e);
@@ -311,7 +313,6 @@ public class DbTxnManager extends HiveTx
     try {
       if (txnId > 0) rollbackTxn();
       if (lockMgr != null) lockMgr.close();
-      if (client != null) client.close();
     } catch (Exception e) {
       LOG.error("Caught exception " + e.getClass().getName() + " with message <" + e.getMessage()
       + ">, swallowing as there is nothing we can do with it.");
@@ -326,10 +327,12 @@ public class DbTxnManager extends HiveTx
             "methods.");
       }
       try {
-        client = new HiveMetaStoreClient(conf);
+        Hive db = Hive.get(conf);
+        client = db.getMSC();
       } catch (MetaException e) {
-        throw new LockException(ErrorMsg.METASTORE_COULD_NOT_INITIATE.getMsg(),
-            e);
+        throw new LockException(ErrorMsg.METASTORE_COULD_NOT_INITIATE.getMsg(), e);
+      } catch (HiveException e) {
+        throw new LockException(ErrorMsg.METASTORE_COULD_NOT_INITIATE.getMsg(), e);
       }
     }
   }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java Tue Oct 14 19:06:45 2014
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.lockmgr
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
 import org.apache.hadoop.hive.ql.metadata.*;
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.hadoop.hive.ql.util.ZooKeeperHiveHelper;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.data.Stat;
 import org.apache.zookeeper.KeeperException;
@@ -73,31 +74,6 @@ public class ZooKeeperHiveLockManager im
   }
 
   /**
-   * @param conf  The hive configuration
-   * Get the quorum server address from the configuration. The format is:
-   * host1:port, host2:port..
-   **/
-  @VisibleForTesting
-  static String getQuorumServers(HiveConf conf) {
-    String[] hosts = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_QUORUM).split(",");
-    String port = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT);
-    StringBuilder quorum = new StringBuilder();
-    for(int i=0; i<hosts.length; i++) {
-      quorum.append(hosts[i].trim());
-      if (!hosts[i].contains(":")) {
-        // if the hostname doesn't contain a port, add the configured port to hostname
-        quorum.append(":");
-        quorum.append(port);
-      }
-
-      if (i != hosts.length-1)
-        quorum.append(",");
-    }
-
-    return quorum.toString();
-  }
-
-  /**
    * @param ctx  The lock manager context (containing the Hive configuration file)
    * Start the ZooKeeper client based on the zookeeper cluster specified in the conf.
    **/
@@ -105,7 +81,7 @@ public class ZooKeeperHiveLockManager im
     this.ctx = ctx;
     HiveConf conf = ctx.getConf();
     sessionTimeout = conf.getIntVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT);
-    quorumServers = ZooKeeperHiveLockManager.getQuorumServers(conf);
+    quorumServers = ZooKeeperHiveHelper.getQuorumServers(conf);
 
     sleepTime = conf.getTimeVar(
         HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES, TimeUnit.MILLISECONDS);
@@ -146,7 +122,7 @@ public class ZooKeeperHiveLockManager im
       return;
     }
 
-    zooKeeper = new ZooKeeper(quorumServers, sessionTimeout, new DummyWatcher());
+    zooKeeper = new ZooKeeper(quorumServers, sessionTimeout, new ZooKeeperHiveHelper.DummyWatcher());
   }
 
   /**
@@ -517,8 +493,8 @@ public class ZooKeeperHiveLockManager im
     ZooKeeper zkpClient = null;
     try {
       int sessionTimeout = conf.getIntVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT);
-      String quorumServers = getQuorumServers(conf);
-      Watcher dummyWatcher = new DummyWatcher();
+      String quorumServers = ZooKeeperHiveHelper.getQuorumServers(conf);
+      Watcher dummyWatcher = new ZooKeeperHiveHelper.DummyWatcher();
       zkpClient = new ZooKeeper(quorumServers, sessionTimeout, dummyWatcher);
       String parent = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_NAMESPACE);
       List<HiveLock> locks = getLocks(conf, zkpClient, null, parent, false, false);
@@ -629,7 +605,8 @@ public class ZooKeeperHiveLockManager im
 
         if (fetchData) {
           try {
-            data = new HiveLockObjectData(new String(zkpClient.getData(curChild, new DummyWatcher(), null)));
+            data = new HiveLockObjectData(new String(zkpClient.getData(curChild,
+                new ZooKeeperHiveHelper.DummyWatcher(), null)));
             data.setClientIp(clientIp);
           } catch (Exception e) {
             LOG.error("Error in getting data for " + curChild, e);
@@ -789,11 +766,6 @@ public class ZooKeeperHiveLockManager im
     return null;
   }
 
-  public static class DummyWatcher implements Watcher {
-    public void process(org.apache.zookeeper.WatchedEvent event)  {
-    }
-  }
-
   @Override
   public void prepareRetry() throws LockException {
     try {

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Tue Oct 14 19:06:45 2014
@@ -109,8 +109,8 @@ import org.apache.hadoop.hive.serde2.laz
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.thrift.TException;
 
 import com.google.common.collect.Sets;
@@ -378,6 +378,27 @@ public class Hive {
       List<String> partCols, Class<? extends InputFormat> fileInputFormat,
       Class<?> fileOutputFormat, int bucketCount, List<String> bucketCols)
       throws HiveException {
+    createTable(tableName, columns, partCols, fileInputFormat, fileOutputFormat, bucketCount,
+        bucketCols, null);
+  }
+
+  /**
+   * Create a table metadata and the directory for the table data
+   * @param tableName table name
+   * @param columns list of fields of the table
+   * @param partCols partition keys of the table
+   * @param fileInputFormat Class of the input format of the table data file
+   * @param fileOutputFormat Class of the output format of the table data file
+   * @param bucketCount number of buckets that each partition (or the table itself) should be
+   *                    divided into
+   * @param bucketCols Bucket columns
+   * @param parameters Parameters for the table
+   * @throws HiveException
+   */
+  public void createTable(String tableName, List<String> columns, List<String> partCols,
+                          Class<? extends InputFormat> fileInputFormat,
+                          Class<?> fileOutputFormat, int bucketCount, List<String> bucketCols,
+                          Map<String, String> parameters) throws HiveException {
     if (columns == null) {
       throw new HiveException("columns not specified for table " + tableName);
     }
@@ -402,6 +423,9 @@ public class Hive {
     tbl.setSerializationLib(LazySimpleSerDe.class.getName());
     tbl.setNumBuckets(bucketCount);
     tbl.setBucketCols(bucketCols);
+    if (parameters != null) {
+      tbl.setParamters(parameters);
+    }
     createTable(tbl);
   }
 
@@ -427,9 +451,9 @@ public class Hive {
       newTbl.checkValidity();
       getMSC().alter_table(names[0], names[1], newTbl.getTTable());
     } catch (MetaException e) {
-      throw new HiveException("Unable to alter table.", e);
+      throw new HiveException("Unable to alter table. " + e.getMessage(), e);
     } catch (TException e) {
-      throw new HiveException("Unable to alter table.", e);
+      throw new HiveException("Unable to alter table. " + e.getMessage(), e);
     }
   }
 
@@ -455,9 +479,9 @@ public class Hive {
     try {
       getMSC().alter_index(dbName, baseTblName, idxName, newIdx);
     } catch (MetaException e) {
-      throw new HiveException("Unable to alter index.", e);
+      throw new HiveException("Unable to alter index. " + e.getMessage(), e);
     } catch (TException e) {
-      throw new HiveException("Unable to alter index.", e);
+      throw new HiveException("Unable to alter index. " + e.getMessage(), e);
     }
   }
 
@@ -502,9 +526,9 @@ public class Hive {
       getMSC().alter_partition(dbName, tblName, newPart.getTPartition());
 
     } catch (MetaException e) {
-      throw new HiveException("Unable to alter partition.", e);
+      throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
     } catch (TException e) {
-      throw new HiveException("Unable to alter partition.", e);
+      throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
     }
   }
 
@@ -534,9 +558,9 @@ public class Hive {
       }
       getMSC().alter_partitions(names[0], names[1], newTParts);
     } catch (MetaException e) {
-      throw new HiveException("Unable to alter partition.", e);
+      throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
     } catch (TException e) {
-      throw new HiveException("Unable to alter partition.", e);
+      throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
     }
   }
   /**
@@ -578,11 +602,11 @@ public class Hive {
           newPart.getTPartition());
 
     } catch (InvalidOperationException e){
-      throw new HiveException("Unable to rename partition.", e);
+      throw new HiveException("Unable to rename partition. " + e.getMessage(), e);
     } catch (MetaException e) {
-      throw new HiveException("Unable to rename partition.", e);
+      throw new HiveException("Unable to rename partition. " + e.getMessage(), e);
     } catch (TException e) {
-      throw new HiveException("Unable to rename partition.", e);
+      throw new HiveException("Unable to rename partition. " + e.getMessage(), e);
     }
   }
 
@@ -591,11 +615,11 @@ public class Hive {
     try {
       getMSC().alterDatabase(dbName, db);
     } catch (MetaException e) {
-      throw new HiveException("Unable to alter database " + dbName, e);
+      throw new HiveException("Unable to alter database " + dbName + ". " + e.getMessage(), e);
     } catch (NoSuchObjectException e) {
       throw new HiveException("Database " + dbName + " does not exists.", e);
     } catch (TException e) {
-      throw new HiveException("Unable to alter database " + dbName, e);
+      throw new HiveException("Unable to alter database " + dbName + ". " + e.getMessage(), e);
     }
   }
   /**
@@ -870,14 +894,31 @@ public class Hive {
     try {
       return getMSC().dropIndex(db_name, tbl_name, index_name, deleteData);
     } catch (NoSuchObjectException e) {
-      throw new HiveException("Partition or table doesn't exist.", e);
+      throw new HiveException("Partition or table doesn't exist. " + e.getMessage(), e);
     } catch (Exception e) {
-      throw new HiveException("Unknown error. Please check logs.", e);
+      throw new HiveException(e.getMessage(), e);
     }
   }
 
   /**
    * Drops table along with the data in it. If the table doesn't exist then it
+   * is a no-op. If ifPurge option is specified it is passed to the
+   * hdfs command that removes table data from warehouse to make it skip trash.
+   *
+   * @param tableName
+   *          table to drop
+   * @param ifPurge
+   *          completely purge the table (skipping trash) while removing data from warehouse
+   * @throws HiveException
+   *           thrown if the drop fails
+   */
+  public void dropTable(String tableName, boolean ifPurge) throws HiveException {
+    String[] names = Utilities.getDbTableName(tableName);
+    dropTable(names[0], names[1], true, true, ifPurge);
+  }
+
+  /**
+   * Drops table along with the data in it. If the table doesn't exist then it
    * is a no-op
    *
    * @param tableName
@@ -886,8 +927,7 @@ public class Hive {
    *           thrown if the drop fails
    */
   public void dropTable(String tableName) throws HiveException {
-    String[] names = Utilities.getDbTableName(tableName);
-    dropTable(names[0], names[1], true, true);
+    dropTable(tableName, false);
   }
 
   /**
@@ -902,7 +942,7 @@ public class Hive {
    *           thrown if the drop fails
    */
   public void dropTable(String dbName, String tableName) throws HiveException {
-    dropTable(dbName, tableName, true, true);
+    dropTable(dbName, tableName, true, true, false);
   }
 
   /**
@@ -913,14 +953,31 @@ public class Hive {
    * @param deleteData
    *          deletes the underlying data along with metadata
    * @param ignoreUnknownTab
-   *          an exception if thrown if this is falser and table doesn't exist
+   *          an exception is thrown if this is false and the table doesn't exist
    * @throws HiveException
    */
   public void dropTable(String dbName, String tableName, boolean deleteData,
       boolean ignoreUnknownTab) throws HiveException {
+    dropTable(dbName, tableName, deleteData, ignoreUnknownTab, false);
+  }
 
+  /**
+   * Drops the table.
+   *
+   * @param dbName
+   * @param tableName
+   * @param deleteData
+   *          deletes the underlying data along with metadata
+   * @param ignoreUnknownTab
+   *          an exception is thrown if this is false and the table doesn't exist
+   * @param ifPurge
+   *          completely purge the table skipping trash while removing data from warehouse
+   * @throws HiveException
+   */
+  public void dropTable(String dbName, String tableName, boolean deleteData,
+      boolean ignoreUnknownTab, boolean ifPurge) throws HiveException {
     try {
-      getMSC().dropTable(dbName, tableName, deleteData, ignoreUnknownTab);
+      getMSC().dropTable(dbName, tableName, deleteData, ignoreUnknownTab, ifPurge);
     } catch (NoSuchObjectException e) {
       if (!ignoreUnknownTab) {
         throw new HiveException(e);
@@ -1008,7 +1065,7 @@ public class Hive {
       }
       return null;
     } catch (Exception e) {
-      throw new HiveException("Unable to fetch table " + tableName, e);
+      throw new HiveException("Unable to fetch table " + tableName + ". " + e.getMessage(), e);
     }
 
     // For non-views, we need to do some extra fixes
@@ -1204,6 +1261,15 @@ public class Hive {
     return getDatabase(currentDb);
   }
 
+  public void loadPartition(Path loadPath, String tableName,
+      Map<String, String> partSpec, boolean replace, boolean holdDDLTime,
+      boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir,
+      boolean isSrcLocal, boolean isAcid) throws HiveException {
+    Table tbl = getTable(tableName);
+    loadPartition(loadPath, tbl, partSpec, replace, holdDDLTime, inheritTableSpecs,
+        isSkewedStoreAsSubdir, isSrcLocal, isAcid);
+  }
+
   /**
    * Load a directory into a Hive Table Partition - Alters existing content of
    * the partition with the contents of loadPath. - If the partition does not
@@ -1212,7 +1278,7 @@ public class Hive {
    *
    * @param loadPath
    *          Directory containing files to load into Table
-   * @param tableName
+   * @param  tbl
    *          name of table to be loaded.
    * @param partSpec
    *          defines which partition needs to be loaded
@@ -1225,12 +1291,12 @@ public class Hive {
    * @param isSrcLocal
    *          If the source directory is LOCAL
    */
-  public void loadPartition(Path loadPath, String tableName,
+  public Partition loadPartition(Path loadPath, Table tbl,
       Map<String, String> partSpec, boolean replace, boolean holdDDLTime,
       boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir,
       boolean isSrcLocal, boolean isAcid) throws HiveException {
-    Table tbl = getTable(tableName);
     Path tblDataLocationPath =  tbl.getDataLocation();
+    Partition newTPart = null;
     try {
       /**
        * Move files before creating the partition since down stream processes
@@ -1279,10 +1345,10 @@ public class Hive {
         Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid);
       }
 
+      boolean forceCreate = (!holdDDLTime) ? true : false;
+      newTPart = getPartition(tbl, partSpec, forceCreate, newPartPath.toString(), inheritTableSpecs);
       // recreate the partition if it existed before
       if (!holdDDLTime) {
-        Partition newTPart = getPartition(tbl, partSpec, true, newPartPath.toString(),
-            inheritTableSpecs);
         if (isSkewedStoreAsSubdir) {
           org.apache.hadoop.hive.metastore.api.Partition newCreatedTpart = newTPart.getTPartition();
           SkewedInfo skewedInfo = newCreatedTpart.getSd().getSkewedInfo();
@@ -1292,9 +1358,9 @@ public class Hive {
           /* Add list bucketing location mappings. */
           skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps);
           newCreatedTpart.getSd().setSkewedInfo(skewedInfo);
-          alterPartition(tbl.getTableName(), new Partition(tbl, newCreatedTpart));
+          alterPartition(tbl.getDbName(), tbl.getTableName(), new Partition(tbl, newCreatedTpart));
           newTPart = getPartition(tbl, partSpec, true, newPartPath.toString(), inheritTableSpecs);
-          newCreatedTpart = newTPart.getTPartition();
+          return new Partition(tbl, newCreatedTpart);
         }
       }
     } catch (IOException e) {
@@ -1307,7 +1373,7 @@ public class Hive {
       LOG.error(StringUtils.stringifyException(e));
       throw new HiveException(e);
     }
-
+    return newTPart;
   }
 
   /**
@@ -1403,18 +1469,18 @@ private void constructOneLBLocationMap(F
    * @param replace
    * @param numDP number of dynamic partitions
    * @param holdDDLTime
-   * @return a list of strings with the dynamic partition paths
+   * @return partition map details (PartitionSpec and Partition)
    * @throws HiveException
    */
-  public ArrayList<LinkedHashMap<String, String>> loadDynamicPartitions(Path loadPath,
+  public Map<Map<String, String>, Partition> loadDynamicPartitions(Path loadPath,
       String tableName, Map<String, String> partSpec, boolean replace,
       int numDP, boolean holdDDLTime, boolean listBucketingEnabled, boolean isAcid)
       throws HiveException {
 
     Set<Path> validPartitions = new HashSet<Path>();
     try {
-      ArrayList<LinkedHashMap<String, String>> fullPartSpecs =
-        new ArrayList<LinkedHashMap<String, String>>();
+      Map<Map<String, String>, Partition> partitionsMap = new
+          LinkedHashMap<Map<String, String>, Partition>();
 
       FileSystem fs = loadPath.getFileSystem(conf);
       FileStatus[] leafStatus = HiveStatsUtils.getFileStatusRecurse(loadPath, numDP+1, fs);
@@ -1448,6 +1514,7 @@ private void constructOneLBLocationMap(F
             + " to at least " + validPartitions.size() + '.');
       }
 
+      Table tbl = getTable(tableName);
       // for each dynamically created DP directory, construct a full partition spec
       // and load the partition based on that
       Iterator<Path> iter = validPartitions.iterator();
@@ -1460,14 +1527,12 @@ private void constructOneLBLocationMap(F
         // generate a full partition specification
         LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>(partSpec);
         Warehouse.makeSpecFromName(fullPartSpec, partPath);
-        fullPartSpecs.add(fullPartSpec);
-
-        // finally load the partition -- move the file to the final table address
-        loadPartition(partPath, tableName, fullPartSpec, replace, holdDDLTime, true,
-            listBucketingEnabled, false, isAcid);
+        Partition newPartition = loadPartition(partPath, tbl, fullPartSpec, replace,
+            holdDDLTime, true, listBucketingEnabled, false, isAcid);
+        partitionsMap.put(fullPartSpec, newPartition);
         LOG.info("New loading path = " + partPath + " with partSpec " + fullPartSpec);
       }
-      return fullPartSpecs;
+      return partitionsMap;
     } catch (IOException e) {
       throw new HiveException(e);
     }
@@ -1500,6 +1565,7 @@ private void constructOneLBLocationMap(F
       tbl.replaceFiles(loadPath, isSrcLocal);
     } else {
       tbl.copyFiles(loadPath, isSrcLocal, isAcid);
+      tbl.getParameters().put(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, "true");
     }
 
     try {
@@ -1613,17 +1679,6 @@ private void constructOneLBLocationMap(F
     return getPartition(tbl, partSpec, forceCreate, null, true);
   }
 
-  private static void clearPartitionStats(org.apache.hadoop.hive.metastore.api.Partition tpart) {
-    Map<String,String> tpartParams = tpart.getParameters();
-    if (tpartParams == null) {
-      return;
-    }
-
-    for (String statType : StatsSetupConst.supportedStats) {
-      tpartParams.remove(statType);
-    }
-  }
-
   /**
    * Returns partition metadata
    *
@@ -1691,7 +1746,7 @@ private void constructOneLBLocationMap(F
             throw new HiveException("new partition path should not be null or empty.");
           }
           tpart.getSd().setLocation(partPath);
-          clearPartitionStats(tpart);
+          tpart.getParameters().put(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK,"true");
           String fullName = tbl.getTableName();
           if (!org.apache.commons.lang.StringUtils.isEmpty(tbl.getDbName())) {
             fullName = tbl.getDbName() + "." + tbl.getTableName();
@@ -1722,7 +1777,7 @@ private void constructOneLBLocationMap(F
     } catch (NoSuchObjectException e) {
       throw new HiveException("Partition or table doesn't exist.", e);
     } catch (Exception e) {
-      throw new HiveException("Unknown error. Please check logs.", e);
+      throw new HiveException(e.getMessage(), e);
     }
   }
 
@@ -1736,6 +1791,7 @@ private void constructOneLBLocationMap(F
   public List<Partition> dropPartitions(String dbName, String tblName,
       List<DropTableDesc.PartSpec> partSpecs,  boolean deleteData, boolean ignoreProtection,
       boolean ifExists) throws HiveException {
+    //TODO: add support for ifPurge
     try {
       Table tbl = getTable(dbName, tblName);
       List<ObjectPair<Integer, byte[]>> partExprs =
@@ -1750,7 +1806,7 @@ private void constructOneLBLocationMap(F
     } catch (NoSuchObjectException e) {
       throw new HiveException("Partition or table doesn't exist.", e);
     } catch (Exception e) {
-      throw new HiveException("Unknown error. Please check logs.", e);
+      throw new HiveException(e.getMessage(), e);
     }
   }
 
@@ -2243,7 +2299,7 @@ private void constructOneLBLocationMap(F
         result.add(srcToDest);
       }
     } catch (IOException e) {
-      throw new HiveException("checkPaths: filesystem error in check phase", e);
+      throw new HiveException("checkPaths: filesystem error in check phase. " + e.getMessage(), e);
     }
     return result;
   }
@@ -2310,7 +2366,7 @@ private void constructOneLBLocationMap(F
       try {
         ShimLoader.getHadoopShims().setFullFileStatus(conf, destStatus, fs, destf);
       } catch (IOException e) {
-        LOG.warn("Error setting permission of file " + destf + ": "+ StringUtils.stringifyException(e));
+        LOG.warn("Error setting permission of file " + destf + ": "+ e.getMessage(), e);
       }
     }
     return success;
@@ -2349,7 +2405,7 @@ private void constructOneLBLocationMap(F
       srcs = srcFs.globStatus(srcf);
     } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
-      throw new HiveException("addFiles: filesystem error in check phase", e);
+      throw new HiveException("addFiles: filesystem error in check phase. " + e.getMessage(), e);
     }
     if (srcs == null) {
       LOG.info("No sources specified to move: " + srcf);
@@ -2375,7 +2431,7 @@ private void constructOneLBLocationMap(F
           }
         }
       } catch (IOException e) {
-        throw new HiveException("copyFiles: error while moving files!!!", e);
+        throw new HiveException("copyFiles: error while moving files!!! " + e.getMessage(), e);
       }
     }
   }
@@ -2447,7 +2503,7 @@ private void constructOneLBLocationMap(F
               fs.rename(bucketSrc, bucketDest);
             }
           } catch (IOException e) {
-            throw new HiveException("Error moving acid files", e);
+            throw new HiveException("Error moving acid files " + e.getMessage(), e);
           }
         }
       }
@@ -2679,7 +2735,7 @@ private void constructOneLBLocationMap(F
       throw new HiveException(e);
     }
   }
-  
+
   public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws HiveException {
     try {
       return getMSC().setPartitionColumnStatistics(request);

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java Tue Oct 14 19:06:45 2014
@@ -451,7 +451,11 @@ public class SessionHiveMetaStoreClient 
     // Delete table data
     if (deleteData && !MetaStoreUtils.isExternalTable(table)) {
       try {
-        getWh().deleteDir(tablePath, true);
+        boolean ifPurge = false;
+        if (envContext != null){
+          ifPurge = Boolean.parseBoolean(envContext.getProperties().get("ifPurge"));
+        }
+        getWh().deleteDir(tablePath, true, ifPurge);
       } catch (Exception err) {
         LOG.error("Failed to delete temp table directory: " + tablePath, err);
         // Forgive error

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Tue Oct 14 19:06:45 2014
@@ -385,6 +385,10 @@ public class Table implements Serializab
     tTable.getParameters().put(name, value);
   }
 
+  public void setParamters(Map<String, String> params) {
+    tTable.setParameters(params);
+  }
+
   public String getProperty(String name) {
     return tTable.getParameters().get(name);
   }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java Tue Oct 14 19:06:45 2014
@@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.Fi
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
 import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.PTFOperator;
@@ -76,6 +77,7 @@ public class ColumnPruner implements Tra
    * @param pactx
    *          the current parse context
    */
+  @Override
   public ParseContext transform(ParseContext pactx) throws SemanticException {
     pGraphContext = pactx;
     opToParseCtxMap = pGraphContext.getOpParseCtx();
@@ -120,6 +122,9 @@ public class ColumnPruner implements Tra
     opRules.put(new RuleRegExp("R11",
         ScriptOperator.getOperatorName() + "%"),
         ColumnPrunerProcFactory.getScriptProc());
+    opRules.put(new RuleRegExp("R12",
+        LimitOperator.getOperatorName() + "%"),
+        ColumnPrunerProcFactory.getLimitProc());
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(ColumnPrunerProcFactory

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Tue Oct 14 19:06:45 2014
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.Gr
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
 import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorFactory;
@@ -215,6 +216,24 @@ public final class ColumnPrunerProcFacto
     }
   }
 
+  public static class ColumnPrunerLimitProc extends ColumnPrunerDefaultProc {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
+        Object... nodeOutputs) throws SemanticException {
+      super.process(nd, stack, ctx, nodeOutputs);
+      List<String> cols = ((ColumnPrunerProcCtx)ctx).getPrunedColLists().get(nd);
+      if (null != cols) {
+        pruneOperator(ctx, (LimitOperator) nd, cols);
+      }
+      return null;
+    }
+  }
+
+  public static ColumnPrunerLimitProc getLimitProc() {
+    return new ColumnPrunerLimitProc();
+  }
+
   public static ColumnPrunerScriptProc getScriptProc() {
     return new ColumnPrunerScriptProc();
   }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java?rev=1631841&r1=1631840&r2=1631841&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java Tue Oct 14 19:06:45 2014
@@ -29,6 +29,7 @@ import java.util.Stack;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
@@ -670,10 +671,30 @@ public final class ConstantPropagateProc
       cppCtx.getOpToConstantExprs().put(op, constants);
       foldOperator(op, cppCtx);
       List<ExprNodeDesc> colList = op.getConf().getColList();
+      List<String> columnNames = op.getConf().getOutputColumnNames();
+      Map<String, ExprNodeDesc> columnExprMap = op.getColumnExprMap();
       if (colList != null) {
         for (int i = 0; i < colList.size(); i++) {
           ExprNodeDesc newCol = foldExpr(colList.get(i), constants, cppCtx, op, 0, false);
+          if (!(colList.get(i) instanceof ExprNodeConstantDesc) && newCol instanceof ExprNodeConstantDesc) {
+            // Lets try to store original column name, if this column got folded
+            // This is useful for optimizations like GroupByOptimizer
+            String colName = colList.get(i).getExprString();
+            if (HiveConf.getPositionFromInternalName(colName) == -1) {
+              // if its not an internal name, this is what we want.
+              ((ExprNodeConstantDesc)newCol).setFoldedFromCol(colName);
+            } else {
+              // If it was internal column, lets try to get name from columnExprMap
+              ExprNodeDesc desc = columnExprMap.get(colName);
+              if (desc instanceof ExprNodeConstantDesc) {
+                ((ExprNodeConstantDesc)newCol).setFoldedFromCol(((ExprNodeConstantDesc)desc).getFoldedFromCol());
+              }
+            }
+          }
           colList.set(i, newCol);
+          if (columnExprMap != null) {
+            columnExprMap.put(columnNames.get(i), newCol);
+          }
         }
         LOG.debug("New column list:(" + StringUtils.join(colList, " ") + ")");
       }