You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/11/12 19:23:14 UTC

svn commit: r1541190 [5/15] - in /hive/branches/tez: ./ ant/src/org/apache/hadoop/hive/ant/ beeline/ beeline/src/java/org/apache/hive/beeline/ cli/ cli/src/java/org/apache/hadoop/hive/cli/ common/ common/src/java/org/apache/hadoop/hive/common/ common/s...

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Tue Nov 12 18:23:05 2013
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
@@ -54,6 +55,8 @@ final class ReaderImpl implements Reader
   private final CompressionKind compressionKind;
   private final CompressionCodec codec;
   private final int bufferSize;
+  private OrcProto.Metadata metadata = null;
+  private final int metadataSize;
   private final OrcProto.Footer footer;
   private final ObjectInspector inspector;
   private long deserializedSize = -1;
@@ -294,12 +297,14 @@ final class ReaderImpl implements Reader
     FileMetaInfo footerMetaData = extractMetaInfoFromFooter(fs, path);
 
     MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(footerMetaData.compressionType,
-        footerMetaData.bufferSize, footerMetaData.footerBuffer);
+        footerMetaData.bufferSize, footerMetaData.metadataSize, footerMetaData.footerBuffer);
 
     this.footerByteBuffer = footerMetaData.footerBuffer;
     this.compressionKind = rInfo.compressionKind;
     this.codec = rInfo.codec;
     this.bufferSize = rInfo.bufferSize;
+    this.metadataSize = rInfo.metadataSize;
+    this.metadata = rInfo.metadata;
     this.footer = rInfo.footer;
     this.inspector = rInfo.inspector;
   }
@@ -321,12 +326,15 @@ final class ReaderImpl implements Reader
     MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(
             fMetaInfo.compressionType,
             fMetaInfo.bufferSize,
+            fMetaInfo.metadataSize,
             fMetaInfo.footerBuffer
             );
     this.footerByteBuffer = fMetaInfo.footerBuffer;
     this.compressionKind = rInfo.compressionKind;
     this.codec = rInfo.codec;
     this.bufferSize = rInfo.bufferSize;
+    this.metadataSize = rInfo.metadataSize;
+    this.metadata = rInfo.metadata;
     this.footer = rInfo.footer;
     this.inspector = rInfo.inspector;
   }
@@ -354,6 +362,9 @@ final class ReaderImpl implements Reader
 
     checkOrcVersion(LOG, path, ps.getVersionList());
 
+    int footerSize = (int) ps.getFooterLength();
+    int metadataSize = (int) ps.getMetadataLength();
+
     //check compression codec
     switch (ps.getCompression()) {
       case NONE:
@@ -368,11 +379,8 @@ final class ReaderImpl implements Reader
         throw new IllegalArgumentException("Unknown compression");
     }
 
-    //get footer size
-    int footerSize = (int) ps.getFooterLength();
-
     //check if extra bytes need to be read
-    int extra = Math.max(0, psLen + 1 + footerSize - readSize);
+    int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
     if (extra > 0) {
       //more bytes need to be read, seek back to the right place and read extra bytes
       file.seek(size - readSize - extra);
@@ -384,22 +392,24 @@ final class ReaderImpl implements Reader
       extraBuf.put(buffer);
       buffer = extraBuf;
       buffer.position(0);
-      buffer.limit(footerSize);
+      buffer.limit(footerSize + metadataSize);
     } else {
       //footer is already in the bytes in buffer, just adjust position, length
-      buffer.position(psOffset - footerSize);
+      buffer.position(psOffset - footerSize - metadataSize);
       buffer.limit(psOffset);
     }
-    file.close();
-    //mark this position so that we can call reset() to get back to it
+
+    // remember position for later
     buffer.mark();
 
+    file.close();
+
     return new FileMetaInfo(
         ps.getCompression().toString(),
         (int) ps.getCompressionBlockSize(),
+        (int) ps.getMetadataLength(),
         buffer
         );
-
   }
 
 
@@ -415,23 +425,40 @@ final class ReaderImpl implements Reader
     final CompressionKind compressionKind;
     final CompressionCodec codec;
     final int bufferSize;
+    final int metadataSize;
+    final OrcProto.Metadata metadata;
     final OrcProto.Footer footer;
     final ObjectInspector inspector;
 
-    MetaInfoObjExtractor(String codecStr, int bufferSize, ByteBuffer footerBuffer) throws IOException {
+    MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize, 
+        ByteBuffer footerBuffer) throws IOException {
+
       this.compressionKind = CompressionKind.valueOf(codecStr);
       this.bufferSize = bufferSize;
       this.codec = WriterImpl.createCodec(compressionKind);
-      int footerBufferSize = footerBuffer.limit() - footerBuffer.position();
-      InputStream instream = InStream.create("footer", new ByteBuffer[]{footerBuffer},
+      this.metadataSize = metadataSize;
+
+      int position = footerBuffer.position();
+      int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;
+      footerBuffer.limit(position + metadataSize);
+
+      InputStream instream = InStream.create("metadata", new ByteBuffer[]{footerBuffer},
+          new long[]{0L}, metadataSize, codec, bufferSize);
+      this.metadata = OrcProto.Metadata.parseFrom(instream);
+
+      footerBuffer.position(position + metadataSize);
+      footerBuffer.limit(position + metadataSize + footerBufferSize);
+      instream = InStream.create("footer", new ByteBuffer[]{footerBuffer},
           new long[]{0L}, footerBufferSize, codec, bufferSize);
       this.footer = OrcProto.Footer.parseFrom(instream);
+
+      footerBuffer.position(position);
       this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList());
     }
   }
 
   public FileMetaInfo getFileMetaInfo(){
-    return new FileMetaInfo(compressionKind.toString(), bufferSize, footerByteBuffer);
+    return new FileMetaInfo(compressionKind.toString(), bufferSize, metadataSize, footerByteBuffer);
   }
 
 
@@ -451,6 +478,13 @@ final class ReaderImpl implements Reader
   public RecordReader rows(long offset, long length, boolean[] include,
                            SearchArgument sarg, String[] columnNames
                            ) throws IOException {
+
+    // if included columns is null, then include all columns
+    if (include == null) {
+      include = new boolean[footer.getTypesCount()];
+      Arrays.fill(include, true);
+    }
+
     return new RecordReaderImpl(this.getStripes(), fileSystem,  path, offset,
         length, footer.getTypesList(), codec, bufferSize,
         include, footer.getRowIndexStride(), sarg, columnNames);
@@ -576,4 +610,9 @@ final class ReaderImpl implements Reader
     return Collections.max(indices);
   }
 
+  @Override
+  public Metadata getMetadata() throws IOException {
+    return new Metadata(metadata);
+  }
+
 }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Tue Nov 12 18:23:05 2013
@@ -36,6 +36,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeStatistics;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -64,6 +65,7 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
 
+import com.google.common.collect.Lists;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.CodedOutputStream;
 
@@ -125,7 +127,6 @@ class WriterImpl implements Writer, Memo
   private final boolean buildIndex;
   private final MemoryManager memoryManager;
   private final OrcFile.Version version;
-
   private final Configuration conf;
 
   WriterImpl(FileSystem fs,
@@ -411,6 +412,7 @@ class WriterImpl implements Writer, Memo
     private final BitFieldWriter isPresent;
     private final boolean isCompressed;
     protected final ColumnStatisticsImpl indexStatistics;
+    protected final ColumnStatisticsImpl stripeColStatistics;
     private final ColumnStatisticsImpl fileStatistics;
     protected TreeWriter[] childrenWriters;
     protected final RowIndexPositionRecorder rowIndexPosition;
@@ -419,7 +421,7 @@ class WriterImpl implements Writer, Memo
     private final PositionedOutputStream rowIndexStream;
     private boolean foundNulls;
     private OutStream isPresentOutStream;
-    protected final boolean useDirectV2Encoding;
+    private final List<StripeStatistics.Builder> stripeStatsBuilders;
 
     /**
      * Create a tree writer.
@@ -435,7 +437,6 @@ class WriterImpl implements Writer, Memo
       this.isCompressed = streamFactory.isCompressed();
       this.id = columnId;
       this.inspector = inspector;
-      this.useDirectV2Encoding = true;
       if (nullable) {
         isPresentOutStream = streamFactory.createStream(id,
             OrcProto.Stream.Kind.PRESENT);
@@ -445,11 +446,13 @@ class WriterImpl implements Writer, Memo
       }
       this.foundNulls = false;
       indexStatistics = ColumnStatisticsImpl.create(inspector);
+      stripeColStatistics = ColumnStatisticsImpl.create(inspector);
       fileStatistics = ColumnStatisticsImpl.create(inspector);
       childrenWriters = new TreeWriter[0];
       rowIndex = OrcProto.RowIndex.newBuilder();
       rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
       rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry);
+      stripeStatsBuilders = Lists.newArrayList();
       if (streamFactory.buildIndex()) {
         rowIndexStream = streamFactory.createStream(id,
             OrcProto.Stream.Kind.ROW_INDEX);
@@ -462,6 +465,10 @@ class WriterImpl implements Writer, Memo
       return rowIndex;
     }
 
+    protected ColumnStatisticsImpl getStripeStatistics() {
+      return stripeColStatistics;
+    }
+
     protected ColumnStatisticsImpl getFileStatistics() {
       return fileStatistics;
     }
@@ -537,6 +544,12 @@ class WriterImpl implements Writer, Memo
         }
       }
 
+      // merge stripe-level column statistics to file statistics and write it to
+      // stripe statistics
+      OrcProto.StripeStatistics.Builder stripeStatsBuilder = OrcProto.StripeStatistics.newBuilder();
+      writeStripeStatistics(stripeStatsBuilder, this);
+      stripeStatsBuilders.add(stripeStatsBuilder);
+
       // reset the flag for next stripe
       foundNulls = false;
 
@@ -554,6 +567,16 @@ class WriterImpl implements Writer, Memo
       rowIndexEntry.clear();
     }
 
+    private void writeStripeStatistics(OrcProto.StripeStatistics.Builder builder,
+        TreeWriter treeWriter) {
+      treeWriter.fileStatistics.merge(treeWriter.stripeColStatistics);
+      builder.addColStats(treeWriter.stripeColStatistics.serialize().build());
+      treeWriter.stripeColStatistics.reset();
+      for (TreeWriter child : treeWriter.getChildrenWriters()) {
+        writeStripeStatistics(builder, child);
+      }
+    }
+
     TreeWriter[] getChildrenWriters() {
       return childrenWriters;
     }
@@ -575,7 +598,7 @@ class WriterImpl implements Writer, Memo
      * @throws IOException
      */
     void createRowIndexEntry() throws IOException {
-      fileStatistics.merge(indexStatistics);
+      stripeColStatistics.merge(indexStatistics);
       rowIndexEntry.setStatistics(indexStatistics.serialize());
       indexStatistics.reset();
       rowIndex.addEntry(rowIndexEntry);
@@ -1013,7 +1036,7 @@ class WriterImpl implements Writer, Memo
      */
     @Override
     void createRowIndexEntry() throws IOException {
-      getFileStatistics().merge(indexStatistics);
+      getStripeStatistics().merge(indexStatistics);
       OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
       rowIndexEntry.setStatistics(indexStatistics.serialize());
       indexStatistics.reset();
@@ -1851,6 +1874,21 @@ class WriterImpl implements Writer, Memo
     }
   }
 
+  private int writeMetadata(long bodyLength) throws IOException {
+    ensureWriter();
+    OrcProto.Metadata.Builder builder = OrcProto.Metadata.newBuilder();
+    for(OrcProto.StripeStatistics.Builder ssb : treeWriter.stripeStatsBuilders) {
+      builder.addStripeStats(ssb.build());
+    }
+
+    long startPosn = rawWriter.getPos();
+    OrcProto.Metadata metadata = builder.build();
+    metadata.writeTo(protobufWriter);
+    protobufWriter.flush();
+    writer.flush();
+    return (int) (rawWriter.getPos() - startPosn);
+  }
+
   private int writeFooter(long bodyLength) throws IOException {
     ensureWriter();
     OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder();
@@ -1881,11 +1919,12 @@ class WriterImpl implements Writer, Memo
     return (int) (rawWriter.getPos() - startPosn);
   }
 
-  private int writePostScript(int footerLength) throws IOException {
+  private int writePostScript(int footerLength, int metadataLength) throws IOException {
     OrcProto.PostScript.Builder builder =
       OrcProto.PostScript.newBuilder()
         .setCompression(writeCompressionKind(compress))
         .setFooterLength(footerLength)
+        .setMetadataLength(metadataLength)
         .setMagic(OrcFile.MAGIC)
         .addVersion(version.getMajor())
         .addVersion(version.getMinor());
@@ -1940,8 +1979,9 @@ class WriterImpl implements Writer, Memo
     // actually close the file
     synchronized (this) {
       flushStripe();
-      int footerLength = writeFooter(rawWriter.getPos());
-      rawWriter.writeByte(writePostScript(footerLength));
+      int metadataLength = writeMetadata(rawWriter.getPos());
+      int footerLength = writeFooter(rawWriter.getPos() - metadataLength);
+      rawWriter.writeByte(writePostScript(footerLength, metadataLength));
       rawWriter.close();
     }
   }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java Tue Nov 12 18:23:05 2013
@@ -91,8 +91,7 @@ public class RCFileMergeMapper extends M
     updatePaths(tmpPath, taskTmpPath);
     try {
       fs = (new Path(specPath)).getFileSystem(job);
-      autoDelete = ShimLoader.getHadoopShims().fileSystemDeleteOnExit(fs,
-          outPath);
+      autoDelete = fs.deleteOnExit(outPath);
     } catch (IOException e) {
       this.exception = true;
       throw new RuntimeException(e);

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java Tue Nov 12 18:23:05 2013
@@ -84,8 +84,7 @@ public class ColumnTruncateMapper extend
     updatePaths(tmpPath, taskTmpPath);
     try {
       fs = (new Path(specPath)).getFileSystem(job);
-      autoDelete = ShimLoader.getHadoopShims().fileSystemDeleteOnExit(fs,
-          outPath);
+      autoDelete = fs.deleteOnExit(outPath);
     } catch (IOException e) {
       this.exception = true;
       throw new RuntimeException(e);

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Tue Nov 12 18:23:05 2013
@@ -1567,8 +1567,8 @@ private void constructOneLBLocationMap(F
     if (tpartParams == null) {
       return;
     }
-    List<String> statTypes = StatsSetupConst.getSupportedStats();
-    for (String statType : statTypes) {
+
+    for (String statType : StatsSetupConst.supportedStats) {
       tpartParams.remove(statType);
     }
   }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java Tue Nov 12 18:23:05 2013
@@ -260,7 +260,7 @@ public class JsonMetaDataFormatter imple
       // in case all files in locations do not exist
       try {
         FileStatus tmpStatus = fs.getFileStatus(tblPath);
-        lastAccessTime = ShimLoader.getHadoopShims().getAccessTime(tmpStatus);
+        lastAccessTime = tmpStatus.getAccessTime();
         lastUpdateTime = tmpStatus.getModificationTime();
       } catch (IOException e) {
         LOG.warn(
@@ -273,7 +273,7 @@ public class JsonMetaDataFormatter imple
           try {
             FileStatus status = fs.getFileStatus(tblPath);
             FileStatus[] files = fs.listStatus(loc);
-            long accessTime = ShimLoader.getHadoopShims().getAccessTime(status);
+            long accessTime = status.getAccessTime();
             long updateTime = status.getModificationTime();
             // no matter loc is the table location or part location, it must be a
             // directory.
@@ -299,8 +299,7 @@ public class JsonMetaDataFormatter imple
               if (fileLen < minFileSize) {
                 minFileSize = fileLen;
               }
-              accessTime = ShimLoader.getHadoopShims().getAccessTime(
-                  currentStatus);
+              accessTime = currentStatus.getAccessTime();
               updateTime = currentStatus.getModificationTime();
               if (accessTime > lastAccessTime) {
                 lastAccessTime = accessTime;

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java Tue Nov 12 18:23:05 2013
@@ -278,7 +278,7 @@ class TextMetaDataFormatter implements M
       // in case all files in locations do not exist
       try {
         FileStatus tmpStatus = fs.getFileStatus(tblPath);
-        lastAccessTime = ShimLoader.getHadoopShims().getAccessTime(tmpStatus);
+        lastAccessTime = tmpStatus.getAccessTime();
         lastUpdateTime = tmpStatus.getModificationTime();
         if (partSpecified) {
           // check whether the part exists or not in fs
@@ -295,7 +295,7 @@ class TextMetaDataFormatter implements M
           try {
             FileStatus status = fs.getFileStatus(tblPath);
             FileStatus[] files = fs.listStatus(loc);
-            long accessTime = ShimLoader.getHadoopShims().getAccessTime(status);
+            long accessTime = status.getAccessTime();
             long updateTime = status.getModificationTime();
             // no matter loc is the table location or part location, it must be a
             // directory.
@@ -321,8 +321,7 @@ class TextMetaDataFormatter implements M
               if (fileLen < minFileSize) {
                 minFileSize = fileLen;
               }
-              accessTime = ShimLoader.getHadoopShims().getAccessTime(
-                  currentStatus);
+              accessTime = currentStatus.getAccessTime();
               updateTime = currentStatus.getModificationTime();
               if (accessTime > lastAccessTime) {
                 lastAccessTime = accessTime;

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java Tue Nov 12 18:23:05 2013
@@ -122,11 +122,6 @@ public class LimitPushdownOptimizer impl
         }
       }
       if (rs != null) {
-        List<List<Integer>> distincts = rs.getConf().getDistinctColumnIndices();
-        if (distincts != null && distincts.size() > 1) {
-          // multi distinct case. can not sure that it's safe just by multiplying limit value
-          return false;
-        }
         LimitOperator limit = (LimitOperator) nd;
         rs.getConf().setTopN(limit.getConf().getLimit());
         rs.getConf().setTopNMemoryUsage(((LimitPushdownContext) procCtx).threshold);

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java Tue Nov 12 18:23:05 2013
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.optimizer;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
@@ -25,6 +26,8 @@ import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.FetchTask;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -37,7 +40,11 @@ import org.apache.hadoop.hive.ql.exec.Ta
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.hive.ql.metadata.InputEstimator;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
@@ -53,6 +60,8 @@ import org.apache.hadoop.hive.ql.plan.Op
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
 
 /**
  * Tries to convert simple fetch query to single fetch task, which fetches rows directly
@@ -75,7 +84,7 @@ public class SimpleFetchOptimizer implem
           if (fetchTask != null) {
             pctx.setFetchTask(fetchTask);
           }
-        } catch (HiveException e) {
+        } catch (Exception e) {
           // Has to use full name to make sure it does not conflict with
           // org.apache.commons.lang.StringUtils
           LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
@@ -92,13 +101,13 @@ public class SimpleFetchOptimizer implem
   // returns non-null FetchTask instance when succeeded
   @SuppressWarnings("unchecked")
   private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator source)
-      throws HiveException {
+      throws Exception {
     String mode = HiveConf.getVar(
         pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSION);
 
     boolean aggressive = "more".equals(mode);
     FetchData fetch = checkTree(aggressive, pctx, alias, source);
-    if (fetch != null) {
+    if (fetch != null && checkThreshold(fetch, pctx)) {
       int limit = pctx.getQB().getParseInfo().getOuterQueryLimit();
       FetchWork fetchWork = fetch.convertToWork();
       FetchTask fetchTask = (FetchTask) TaskFactory.get(fetchWork, pctx.getConf());
@@ -110,6 +119,21 @@ public class SimpleFetchOptimizer implem
     return null;
   }
 
+  private boolean checkThreshold(FetchData data, ParseContext pctx) throws Exception {
+    long threshold = HiveConf.getLongVar(pctx.getConf(),
+        HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD);
+    if (threshold < 0) {
+      return true;
+    }
+    long remaining = threshold;
+    remaining -= data.getInputLength(pctx, remaining);
+    if (remaining < 0) {
+      LOG.info("Threshold " + remaining + " exceeded for pseudoMR mode");
+      return false;
+    }
+    return true;
+  }
+
   // all we can handle is LimitOperator, FilterOperator SelectOperator and final FS
   //
   // for non-aggressive mode (minimal)
@@ -144,7 +168,8 @@ public class SimpleFetchOptimizer implem
       PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts);
       if (aggressive || !pruned.hasUnknownPartitions()) {
         bypassFilter &= !pruned.hasUnknownPartitions();
-        return checkOperators(new FetchData(pruned, splitSample), ts, aggressive, bypassFilter);
+        return checkOperators(new FetchData(table, pruned, splitSample), ts,
+            aggressive, bypassFilter);
       }
     }
     return null;
@@ -171,6 +196,7 @@ public class SimpleFetchOptimizer implem
       }
     }
     if (op instanceof FileSinkOperator) {
+      fetch.scanOp = ts;
       fetch.fileSink = op;
       return fetch;
     }
@@ -184,6 +210,9 @@ public class SimpleFetchOptimizer implem
     private final PrunedPartitionList partsList;
     private final HashSet<ReadEntity> inputs = new HashSet<ReadEntity>();
 
+    // source table scan
+    private TableScanOperator scanOp;
+
     // this is always non-null when conversion is completed
     private Operator<?> fileSink;
 
@@ -193,15 +222,15 @@ public class SimpleFetchOptimizer implem
       this.splitSample = splitSample;
     }
 
-    private FetchData(PrunedPartitionList partsList, SplitSample splitSample) {
-      this.table = null;
+    private FetchData(Table table, PrunedPartitionList partsList, SplitSample splitSample) {
+      this.table = table;
       this.partsList = partsList;
       this.splitSample = splitSample;
     }
 
     private FetchWork convertToWork() throws HiveException {
       inputs.clear();
-      if (table != null) {
+      if (!table.isPartitioned()) {
         inputs.add(new ReadEntity(table));
         String path = table.getPath().toString();
         FetchWork work = new FetchWork(path, Utilities.getTableDesc(table));
@@ -236,6 +265,56 @@ public class SimpleFetchOptimizer implem
       pctx.getSemanticInputs().addAll(inputs);
       return replaceFSwithLS(fileSink, work.getSerializationNullFormat());
     }
+
+    private long getInputLength(ParseContext pctx, long remaining) throws Exception {
+      if (splitSample != null && splitSample.getTotalLength() != null) {
+        return splitSample.getTotalLength();
+      }
+      long length = calculateLength(pctx, remaining);
+      if (splitSample != null) {
+        return splitSample.getTargetSize(length);
+      }
+      return length;
+    }
+
+    private long calculateLength(ParseContext pctx, long remaining) throws Exception {
+      JobConf jobConf = new JobConf(pctx.getConf());
+      Utilities.setColumnNameList(jobConf, scanOp, true);
+      Utilities.setColumnTypeList(jobConf, scanOp, true);
+      HiveStorageHandler handler = table.getStorageHandler();
+      if (handler instanceof InputEstimator) {
+        InputEstimator estimator = (InputEstimator) handler;
+        TableDesc tableDesc = Utilities.getTableDesc(table);
+        PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
+        Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
+        return estimator.estimate(jobConf, scanOp, remaining).getTotalLength();
+      }
+      if (table.isNonNative()) {
+        return 0; // nothing can be done
+      }
+      if (!table.isPartitioned()) {
+        return getFileLength(jobConf, table.getPath(), table.getInputFormatClass());
+      }
+      long total = 0;
+      for (Partition partition : partsList.getNotDeniedPartns()) {
+        Path path = partition.getPartitionPath();
+        total += getFileLength(jobConf, path, partition.getInputFormatClass());
+      }
+      return total;
+    }
+
+    // from Utilities.getInputSummary()
+    private long getFileLength(JobConf conf, Path path, Class<? extends InputFormat> clazz)
+        throws IOException {
+      ContentSummary summary;
+      if (ContentSummaryInputFormat.class.isAssignableFrom(clazz)) {
+        InputFormat input = HiveInputFormat.getInputFormatFromCache(clazz, conf);
+        summary = ((ContentSummaryInputFormat)input).getContentSummary(path, conf);
+      } else {
+        summary = path.getFileSystem(conf).getContentSummary(path);
+      }
+      return summary.getLength();
+    }
   }
 
   public static ListSinkOperator replaceFSwithLS(Operator<?> fileSink, String nullFormat) {

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java Tue Nov 12 18:23:05 2013
@@ -4,6 +4,7 @@ import java.util.ArrayList;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.Stack;
 
 import org.apache.commons.logging.Log;
@@ -216,13 +217,15 @@ public class StatsOptimizer implements T
               String colName = desc.getColumn();
               StatType type = getType(desc.getTypeString());
               if(!tbl.isPartitioned()) {
-                rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT));
+                if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
+                  Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
+                  return null;
+                  }
+            	rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT));
                 if (rowCnt < 1) {
                   Log.debug("Table doesn't have upto date stats " + tbl.getTableName());
                   return null;
                 }
-                //TODO: After HIVE-3777 use the property to figure out if following
-                // stats is fresh or not.
                 ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics(
                     tbl.getDbName(),tbl.getTableName(),colName).
                     getStatsObjIterator().next().getStatsData();
@@ -236,18 +239,20 @@ public class StatsOptimizer implements T
                 }
               } else {
                 for (Partition part : hive.getAllPartitionsOf(tbl)) {
-                  Long partRowCnt = Long.parseLong(part.getParameters()
+                  if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
+                    Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
+                    return null;
+                    }
+                	Long partRowCnt = Long.parseLong(part.getParameters()
                     .get(StatsSetupConst.ROW_COUNT));
                   if (partRowCnt < 1) {
                     Log.debug("Partition doesn't have upto date stats " + part.getSpec());
                     return null;
                   }
                   rowCnt += partRowCnt;
-                  //TODO: After HIVE-3777 use the property to figure out if following
-                  // stats is fresh or not.
                   ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
-                      tbl.getDbName(), tbl.getTableName(),part.getName(), colName)
-                      .getStatsObjIterator().next().getStatsData();
+                    tbl.getDbName(), tbl.getTableName(),part.getName(), colName)
+                    .getStatsObjIterator().next().getStatsData();
                   Long nullCnt = getNullcountFor(type, statData);
                   if(nullCnt == null) {
                     Log.debug("Unsupported type: " + desc.getTypeString() + " encountered in " +
@@ -268,11 +273,13 @@ public class StatsOptimizer implements T
             String colName = colDesc.getColumn();
             StatType type = getType(colDesc.getTypeString());
             if(!tbl.isPartitioned()) {
-              //TODO: After HIVE-3777 use the property to figure out if following
-              // stats is fresh or not.
+              if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
+                  Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
+                  return null;
+                  }
               ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics(
-                  tbl.getDbName(),tbl.getTableName(),colName).
-                  getStatsObjIterator().next().getStatsData();
+                tbl.getDbName(),tbl.getTableName(),colName).
+                getStatsObjIterator().next().getStatsData();
               switch (type) {
               case Integeral:
                 oneRow.add(statData.getLongStats().getHighValue());
@@ -291,16 +298,17 @@ public class StatsOptimizer implements T
                 return null;
               }
             } else {
-              List<String> parts = hive.getMSC().listPartitionNames(tbl.getDbName(),
-                  tbl.getTableName(), (short)-1);
+              Set<Partition> parts = hive.getAllPartitionsOf(tbl);
               switch(type) {
               case Integeral: {
                 long maxVal = Long.MIN_VALUE;
-                for (String part : parts) {
-                  //TODO: After HIVE-3777 use the property to figure out if following
-                  // stats is fresh or not.
-                  ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
-                      tbl.getDbName(),tbl.getTableName(), part, colName).
+                for (Partition part : parts) {
+                  if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
+                    Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
+                    return null;
+                    }
+                	ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
+                      tbl.getDbName(),tbl.getTableName(), part.getName(), colName).
                       getStatsObjIterator().next().getStatsData();
                   maxVal = Math.max(maxVal,statData.getLongStats().getHighValue());
                 }
@@ -311,12 +319,14 @@ public class StatsOptimizer implements T
               }
               case Double: {
                 double maxVal = Double.MIN_VALUE;
-                for (String part : parts) {
-                  //TODO: After HIVE-3777 use the property to figure out if following
-                  // stats is fresh or not.
+                for (Partition part : parts) {
+                  if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
+                    Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
+                    return null;
+                    }
                   ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
-                      tbl.getDbName(),tbl.getTableName(), part, colName).
-                      getStatsObjIterator().next().getStatsData();
+                    tbl.getDbName(),tbl.getTableName(), part.getName(), colName).
+                    getStatsObjIterator().next().getStatsData();
                   maxVal = Math.max(maxVal,statData.getDoubleStats().getHighValue());
                 }
                 oneRow.add(maxVal);
@@ -336,11 +346,13 @@ public class StatsOptimizer implements T
             String colName = colDesc.getColumn();
             StatType type = getType(colDesc.getTypeString());
             if (!tbl.isPartitioned()) {
-              //TODO: After HIVE-3777 use the property to figure out if following
-              // stats is fresh or not.
+              if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
+                Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
+                return null;
+                }
               ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics(
-                  tbl.getDbName(),tbl.getTableName(),colName).
-                  getStatsObjIterator().next().getStatsData();
+                tbl.getDbName(),tbl.getTableName(),colName).
+                getStatsObjIterator().next().getStatsData();
               switch (type) {
               case Integeral:
                 oneRow.add(statData.getLongStats().getLowValue());
@@ -358,17 +370,18 @@ public class StatsOptimizer implements T
                 return null;
               }
             } else {
-              List<String> parts = hive.getMSC().listPartitionNames(tbl.getDbName(),
-                  tbl.getTableName(), (short)-1);
+              Set<Partition> parts = hive.getAllPartitionsOf(tbl);
               switch(type) {
               case Integeral: {
                 long minVal = Long.MAX_VALUE;
-                for (String part : parts) {
-                  //TODO: After HIVE-3777 use the property to figure out if following
-                  // stats is fresh or not.
+                for (Partition part : parts) {
+                  if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
+                    Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
+                    return null;
+                    }
                   ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
-                      tbl.getDbName(),tbl.getTableName(), part, colName).
-                      getStatsObjIterator().next().getStatsData();
+                    tbl.getDbName(),tbl.getTableName(), part.getName(), colName).
+                    getStatsObjIterator().next().getStatsData();
                   minVal = Math.min(minVal,statData.getLongStats().getLowValue());
                 }
                 oneRow.add(minVal);
@@ -378,12 +391,14 @@ public class StatsOptimizer implements T
               }
               case Double: {
                 double minVal = Double.MAX_VALUE;
-                for (String part : parts) {
-                  //TODO: After HIVE-3777 use the property to figure out if following
-                  // stats is fresh or not.
+                for (Partition part : parts) {
+                  if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
+                    Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
+                    return null;
+                    }
                   ColumnStatisticsData statData = hive.getMSC().getPartitionColumnStatistics(
-                      tbl.getDbName(),tbl.getTableName(), part, colName).
-                      getStatsObjIterator().next().getStatsData();
+                    tbl.getDbName(),tbl.getTableName(), part.getName(), colName).
+                    getStatsObjIterator().next().getStatsData();
                   minVal = Math.min(minVal,statData.getDoubleStats().getLowValue());
                 }
                 oneRow.add(minVal);

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java Tue Nov 12 18:23:05 2013
@@ -397,24 +397,31 @@ public class CorrelationOptimizer implem
           }
         }
         if (current instanceof JoinOperator) {
-          LinkedHashSet<ReduceSinkOperator> correlatedRsOps =
-              new LinkedHashSet<ReduceSinkOperator>();
+          boolean isCorrelated = true;
+          int expectedNumCorrelatedRsops = current.getParentOperators().size();
+          LinkedHashSet<ReduceSinkOperator> correlatedRsops = null;
           for (Operator<? extends OperatorDesc> parent : current.getParentOperators()) {
             Set<String> tableNames =
                 pCtx.getOpParseCtx().get(parent).getRowResolver().getTableNames();
             for (String tbl : tableNames) {
               if (tableNeedToCheck.contains(tbl)) {
-                correlatedRsOps.addAll(findCorrelatedReduceSinkOperators(
-                    current, backtrackedKeyCols, backtrackedPartitionCols, childRSOrder,
-                    parent, correlation));
+                correlatedRsops = findCorrelatedReduceSinkOperators(current,
+                    backtrackedKeyCols, backtrackedPartitionCols,
+                    childRSOrder, parent, correlation);
+                if (correlatedRsops.size() != expectedNumCorrelatedRsops) {
+                  isCorrelated = false;
+                }
               }
             }
+            if (!isCorrelated) {
+              break;
+            }
           }
           // If current is JoinOperaotr, we will stop to traverse the tree
           // when any of parent ReduceSinkOperaotr of this JoinOperator is
           // not considered as a correlated ReduceSinkOperator.
-          if (correlatedRsOps.size() == current.getParentOperators().size()) {
-            correlatedReduceSinkOperators.addAll(correlatedRsOps);
+          if (isCorrelated && correlatedRsops != null) {
+            correlatedReduceSinkOperators.addAll(correlatedRsops);
           } else {
             correlatedReduceSinkOperators.clear();
           }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java Tue Nov 12 18:23:05 2013
@@ -84,6 +84,9 @@ public class PhysicalOptimizer {
     if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
       resolvers.add(new Vectorizer());
     }
+    if (!"none".equalsIgnoreCase(hiveConf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
+      resolvers.add(new StageIDsRearranger());
+    }
   }
 
   /**
@@ -98,5 +101,4 @@ public class PhysicalOptimizer {
     }
     return pctx;
   }
-
 }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Nov 12 18:23:05 2013
@@ -63,6 +63,7 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
@@ -174,6 +175,7 @@ public class Vectorizer implements Physi
     supportedGenericUDFs.add(GenericUDFConcat.class);
     supportedGenericUDFs.add(GenericUDFAbs.class);
     supportedGenericUDFs.add(GenericUDFBetween.class);
+    supportedGenericUDFs.add(GenericUDFIn.class);
 
     // For type casts
     supportedGenericUDFs.add(UDFToLong.class);
@@ -364,13 +366,15 @@ public class Vectorizer implements Physi
             Operator<? extends OperatorDesc> op = mWork.getAliasToWork().get(alias);
             if (op == tsOp) {
               fileKey = onefile;
+              if (vContext == null) {
+                vContext = getVectorizationContext(tsOp, physicalContext);
+              }
+              vContext.setFileKey(fileKey);
+              vectorizationContexts.put(fileKey, vContext);
               break;
             }
           }
         }
-        vContext = getVectorizationContext(tsOp, physicalContext);
-        vContext.setFileKey(fileKey);
-        vectorizationContexts.put(fileKey, vContext);
         vContextsByTSOp.put(tsOp, vContext);
       }
 

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java Tue Nov 12 18:23:05 2013
@@ -32,7 +32,7 @@ import org.apache.hadoop.hive.ql.lib.Nod
 public class ASTNode extends CommonTree implements Node,Serializable {
   private static final long serialVersionUID = 1L;
 
-  private ASTNodeOrigin origin;
+  private transient ASTNodeOrigin origin;
 
   public ASTNode() {
   }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Tue Nov 12 18:23:05 2013
@@ -20,6 +20,9 @@ package org.apache.hadoop.hive.ql.parse;
 
 import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
+import java.sql.Date;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -66,6 +69,7 @@ import org.apache.hadoop.hive.ql.plan.Li
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
 import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -73,6 +77,9 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.util.StringUtils;
+
+import com.google.common.annotations.VisibleForTesting;
 
 /**
  * BaseSemanticAnalyzer.
@@ -80,6 +87,7 @@ import org.apache.hadoop.mapred.TextInpu
  */
 @SuppressWarnings("deprecation")
 public abstract class BaseSemanticAnalyzer {
+  private static final Log STATIC_LOG = LogFactory.getLog(BaseSemanticAnalyzer.class.getName());
   protected final Hive db;
   protected final HiveConf conf;
   protected List<Task<? extends Serializable>> rootTasks;
@@ -707,10 +715,8 @@ public abstract class BaseSemanticAnalyz
       this(db, conf, ast, true, false);
     }
 
-    public tableSpec(Hive db, HiveConf conf, ASTNode ast,
-        boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec)
-        throws SemanticException {
-
+    public tableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec,
+        boolean allowPartialPartitionsSpec) throws SemanticException {
       assert (ast.getToken().getType() == HiveParser.TOK_TAB
           || ast.getToken().getType() == HiveParser.TOK_TABLE_PARTITION
           || ast.getToken().getType() == HiveParser.TOK_TABTYPE
@@ -761,7 +767,7 @@ public abstract class BaseSemanticAnalyz
           partSpec.put(colName, val);
         }
 
-        // check if the columns specified in the partition() clause are actually partition columns
+        // check if the columns, as well as value types in the partition() clause are valid
         validatePartSpec(tableHandle, partSpec, ast, conf);
 
         // check if the partition spec is valid
@@ -840,7 +846,6 @@ public abstract class BaseSemanticAnalyz
         return tableHandle.toString();
       }
     }
-
   }
 
   /**
@@ -1156,52 +1161,86 @@ public abstract class BaseSemanticAnalyz
     }
   }
 
-  public static void validatePartSpec(Table tbl,
-      Map<String, String> partSpec, ASTNode astNode, HiveConf conf) throws SemanticException {
-
+  public static void validatePartSpec(Table tbl, Map<String, String> partSpec,
+      ASTNode astNode, HiveConf conf) throws SemanticException {
     Map<ASTNode, ExprNodeDesc> astExprNodeMap = new HashMap<ASTNode, ExprNodeDesc>();
 
     Utilities.validatePartSpec(tbl, partSpec);
 
-    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
+    if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
+      return;
+    }
+
+    try {
+      getPartExprNodeDesc(astNode, astExprNodeMap);
+    } catch (HiveException e) {
+      return;
+    }
+    List<FieldSchema> parts = tbl.getPartitionKeys();
+    Map<String, String> partCols = new HashMap<String, String>(parts.size());
+    for (FieldSchema col : parts) {
+      partCols.put(col.getName(), col.getType().toLowerCase());
+    }
+    for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
+      String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
+      if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
+        astKeyName = stripIdentifierQuotes(astKeyName);
+      }
+      String colType = partCols.get(astKeyName);
+      ObjectInspector inputOI = astExprNodePair.getValue().getWritableObjectInspector();
+
+      TypeInfo expectedType =
+          TypeInfoUtils.getTypeInfoFromTypeString(colType);
+      ObjectInspector outputOI =
+          TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
+      Object value = null;
+      String colSpec = partSpec.get(astKeyName);
       try {
-        getPartExprNodeDesc(astNode, astExprNodeMap);
+        value =
+            ExprNodeEvaluatorFactory.get(astExprNodePair.getValue()).
+            evaluate(colSpec);
       } catch (HiveException e) {
-        return;
+        throw new SemanticException(e);
       }
-      List<FieldSchema> parts = tbl.getPartitionKeys();
-      Map<String, String> partCols = new HashMap<String, String>(parts.size());
-      for (FieldSchema col : parts) {
-        partCols.put(col.getName(), col.getType().toLowerCase());
-      }
-      for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
-
-        String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
-        if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
-          astKeyName = stripIdentifierQuotes(astKeyName);
-        }
-        String colType = partCols.get(astKeyName);
-        ObjectInspector inputOI = astExprNodePair.getValue().getWritableObjectInspector();
-
-        TypeInfo expectedType =
-            TypeInfoUtils.getTypeInfoFromTypeString(colType);
-        ObjectInspector outputOI =
-            TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
-        Object value = null;
-        try {
-          value =
-              ExprNodeEvaluatorFactory.get(astExprNodePair.getValue()).
-              evaluate(partSpec.get(astKeyName));
-        } catch (HiveException e) {
-          throw new SemanticException(e);
-        }
-        Object convertedValue =
-          ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
-        if (convertedValue == null) {
-          throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH.format(astKeyName,
-              inputOI.getTypeName(), outputOI.getTypeName()));
-        }
+      Object convertedValue =
+        ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
+      if (convertedValue == null) {
+        throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH.format(astKeyName,
+            inputOI.getTypeName(), outputOI.getTypeName()));
       }
+
+      normalizeColSpec(partSpec, astKeyName, colType, colSpec, convertedValue);
+    }
+  }
+
+  @VisibleForTesting
+  static void normalizeColSpec(Map<String, String> partSpec, String colName,
+      String colType, String originalColSpec, Object colValue) throws SemanticException {
+    if (colValue == null) return; // nothing to do with nulls
+    String normalizedColSpec = originalColSpec;
+    if (colType.equals(serdeConstants.DATE_TYPE_NAME)) {
+      normalizedColSpec = normalizeDateCol(colValue, originalColSpec);
+    }
+    if (!normalizedColSpec.equals(originalColSpec)) {
+      STATIC_LOG.warn("Normalizing partition spec - " + colName + " from "
+          + originalColSpec + " to " + normalizedColSpec);
+      partSpec.put(colName, normalizedColSpec);
+    }
+  }
+
+  /** A fixed date format to be used for hive partition column values. */
+  private static final DateFormat partitionDateFormat = new SimpleDateFormat("yyyy-MM-dd");
+
+  private static String normalizeDateCol(
+      Object colValue, String originalColSpec) throws SemanticException {
+    Date value;
+    if (colValue instanceof DateWritable) {
+      value = ((DateWritable) colValue).get();
+    } else if (colValue instanceof Date) {
+      value = (Date) colValue;
+    } else {
+      throw new SemanticException("Unexpected date type " + colValue.getClass());
     }
+    return partitionDateFormat.format(value);
   }
 }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Tue Nov 12 18:23:05 2013
@@ -127,6 +127,10 @@ import org.apache.hadoop.hive.ql.securit
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.mapred.InputFormat;
@@ -150,6 +154,7 @@ public class DDLSemanticAnalyzer extends
     TokenToTypeName.put(HiveParser.TOK_FLOAT, serdeConstants.FLOAT_TYPE_NAME);
     TokenToTypeName.put(HiveParser.TOK_DOUBLE, serdeConstants.DOUBLE_TYPE_NAME);
     TokenToTypeName.put(HiveParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME);
+    TokenToTypeName.put(HiveParser.TOK_CHAR, serdeConstants.CHAR_TYPE_NAME);
     TokenToTypeName.put(HiveParser.TOK_VARCHAR, serdeConstants.VARCHAR_TYPE_NAME);
     TokenToTypeName.put(HiveParser.TOK_BINARY, serdeConstants.BINARY_TYPE_NAME);
     TokenToTypeName.put(HiveParser.TOK_DATE, serdeConstants.DATE_TYPE_NAME);
@@ -168,6 +173,10 @@ public class DDLSemanticAnalyzer extends
     }
 
     switch (token) {
+    case HiveParser.TOK_CHAR:
+      CharTypeInfo charTypeInfo = ParseUtils.getCharTypeInfo(node);
+      typeName = charTypeInfo.getQualifiedName();
+      break;
     case HiveParser.TOK_VARCHAR:
       VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(node);
       typeName = varcharTypeInfo.getQualifiedName();

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java Tue Nov 12 18:23:05 2013
@@ -19,7 +19,7 @@
 package org.apache.hadoop.hive.ql.parse;
 
 import java.io.Serializable;
-import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 import org.apache.hadoop.fs.Path;
@@ -60,21 +60,16 @@ public class ExplainSemanticAnalyzer ext
     ctx.setExplainLogical(logical);
 
     // Create a semantic analyzer for the query
-    BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, (ASTNode) ast
-        .getChild(0));
-    sem.analyze((ASTNode) ast.getChild(0), ctx);
+    ASTNode input = (ASTNode) ast.getChild(0);
+    BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, input);
+    sem.analyze(input, ctx);
     sem.validate();
 
     ctx.setResFile(new Path(ctx.getLocalTmpFileURI()));
     List<Task<? extends Serializable>> tasks = sem.getRootTasks();
     Task<? extends Serializable> fetchTask = sem.getFetchTask();
     if (tasks == null) {
-      if (fetchTask != null) {
-        tasks = new ArrayList<Task<? extends Serializable>>();
-        tasks.add(fetchTask);
-      }
-    } else if (fetchTask != null) {
-      tasks.add(fetchTask);
+      tasks = Collections.emptyList();
     }
 
     ParseContext pCtx = null;
@@ -82,17 +77,21 @@ public class ExplainSemanticAnalyzer ext
       pCtx = ((SemanticAnalyzer)sem).getParseContext();
     }
 
-    Task<? extends Serializable> explTask =
-        TaskFactory.get(new ExplainWork(ctx.getResFile().toString(),
+    ExplainWork work = new ExplainWork(ctx.getResFile().toString(),
         pCtx,
         tasks,
-        ((ASTNode) ast.getChild(0)).toStringTree(),
+        fetchTask,
+        input.toStringTree(),
         sem.getInputs(),
         extended,
         formatted,
         dependency,
-        logical),
-      conf);
+        logical);
+
+    work.setAppendTaskType(
+        HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES));
+
+    Task<? extends Serializable> explTask = TaskFactory.get(work, conf);
 
     fieldList = explTask.getResultSchema();
     rootTasks.add(explTask);

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g Tue Nov 12 18:23:05 2013
@@ -105,6 +105,7 @@ KW_DATETIME: 'DATETIME';
 KW_TIMESTAMP: 'TIMESTAMP';
 KW_DECIMAL: 'DECIMAL';
 KW_STRING: 'STRING';
+KW_CHAR: 'CHAR';
 KW_VARCHAR: 'VARCHAR';
 KW_ARRAY: 'ARRAY';
 KW_STRUCT: 'STRUCT';

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Tue Nov 12 18:23:05 2013
@@ -110,6 +110,7 @@ TOK_DATELITERAL;
 TOK_DATETIME;
 TOK_TIMESTAMP;
 TOK_STRING;
+TOK_CHAR;
 TOK_VARCHAR;
 TOK_BINARY;
 TOK_DECIMAL;
@@ -1781,6 +1782,7 @@ primitiveType
     | KW_BINARY        ->    TOK_BINARY
     | KW_DECIMAL (LPAREN prec=Number (COMMA scale=Number)? RPAREN)? -> ^(TOK_DECIMAL $prec? $scale?)
     | KW_VARCHAR LPAREN length=Number RPAREN      ->    ^(TOK_VARCHAR $length)
+    | KW_CHAR LPAREN length=Number RPAREN      ->    ^(TOK_CHAR $length)
     ;
 
 listType

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java Tue Nov 12 18:23:05 2013
@@ -26,6 +26,9 @@ import org.apache.hadoop.hive.common.typ
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -131,6 +134,16 @@ public final class ParseUtils {
     return TypeInfoFactory.getVarcharTypeInfo(Integer.valueOf(lengthStr));
   }
 
+  public static CharTypeInfo getCharTypeInfo(ASTNode node)
+      throws SemanticException {
+    if (node.getChildCount() != 1) {
+      throw new SemanticException("Bad params for type char");
+    }
+
+    String lengthStr = node.getChild(0).getText();
+    return TypeInfoFactory.getCharTypeInfo(Integer.valueOf(lengthStr));
+  }
+
   static int getIndex(String[] list, String elem) {
     for(int i=0; i < list.length; i++) {
       if (list[i].toLowerCase().equals(elem)) {

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Nov 12 18:23:05 2013
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.parse;
 
 import java.io.Serializable;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -166,6 +167,8 @@ import org.apache.hadoop.hive.serde2.laz
 import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
@@ -806,6 +809,7 @@ public class SemanticAnalyzer extends Ba
         String currentDatabase = SessionState.get().getCurrentDatabase();
         String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
         qbp.addInsertIntoTable(tab_name);
+        // TODO: is this supposed to fall thru?
 
       case HiveParser.TOK_DESTINATION:
         ctx_1.dest = "insclause-" + ctx_1.nextNum;
@@ -967,17 +971,17 @@ public class SemanticAnalyzer extends Ba
           throw new SemanticException(generateErrorMessage(ast,
               ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
         }
+        skipRecursion = false;
+        break;
 
       case HiveParser.TOK_INSERT:
         ASTNode destination = (ASTNode) ast.getChild(0);
         Tree tab = destination.getChild(0);
-
         // Proceed if AST contains partition & If Not Exists
         if (destination.getChildCount() == 2 &&
             tab.getChildCount() == 2 &&
             destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
           String tableName = tab.getChild(0).getChild(0).getText();
-
           Tree partitions = tab.getChild(1);
           int childCount = partitions.getChildCount();
           HashMap<String, String> partition = new HashMap<String, String>();
@@ -991,25 +995,30 @@ public class SemanticAnalyzer extends Ba
             partition.put(partitionName, partitionVal);
           }
           // if it is a dynamic partition throw the exception
-          if (childCount == partition.size()) {
-            try {
-              Table table = db.getTable(tableName);
-              Partition parMetaData = db.getPartition(table, partition, false);
-              // Check partition exists if it exists skip the overwrite
-              if (parMetaData != null) {
-                phase1Result = false;
-                skipRecursion = true;
-                LOG.info("Partition already exists so insert into overwrite " +
-                    "skipped for partition : " + parMetaData.toString());
-                break;
-              }
-            } catch (HiveException e) {
-              LOG.info("Error while getting metadata : ", e);
-            }
-          } else {
+          if (childCount != partition.size()) {
             throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
                 .getMsg(partition.toString()));
           }
+          Table table = null;
+          try {
+            table = db.getTable(tableName);
+          } catch (HiveException ex) {
+            throw new SemanticException(ex);
+          }
+          try {
+            Partition parMetaData = db.getPartition(table, partition, false);
+            // Check partition exists if it exists skip the overwrite
+            if (parMetaData != null) {
+              phase1Result = false;
+              skipRecursion = true;
+              LOG.info("Partition already exists so insert into overwrite " +
+                  "skipped for partition : " + parMetaData.toString());
+              break;
+            }
+          } catch (HiveException e) {
+            LOG.info("Error while getting metadata : ", e);
+          }
+          validatePartSpec(table, partition, (ASTNode)tab, conf);
         }
         skipRecursion = false;
         break;
@@ -5705,7 +5714,9 @@ public class SemanticAnalyzer extends Ba
       colNames.add(inputCols.get(i).getInternalName());
       colOIs[i] = inputCols.get(i).getObjectInspector();
     }
-    StructObjectInspector outputOI = genericUDTF.initialize(colOIs);
+    StandardStructObjectInspector rowOI =
+        ObjectInspectorFactory.getStandardStructObjectInspector(colNames, Arrays.asList(colOIs));
+    StructObjectInspector outputOI = genericUDTF.initialize(rowOI);
 
     int numUdtfCols = outputOI.getAllStructFieldRefs().size();
     if (colAliases.isEmpty()) {

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Tue Nov 12 18:23:05 2013
@@ -32,6 +32,7 @@ import java.util.Stack;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionInfo;
@@ -62,6 +63,8 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
@@ -270,8 +273,16 @@ public final class TypeCheckProcFactory 
                 0, expr.getText().length() - 1));
         } else if (expr.getText().endsWith("BD")) {
           // Literal decimal
-          return new ExprNodeConstantDesc(TypeInfoFactory.decimalTypeInfo,
-                expr.getText().substring(0, expr.getText().length() - 2));
+          String strVal = expr.getText().substring(0, expr.getText().length() - 2);
+          HiveDecimal hd = HiveDecimal.create(strVal);
+          int prec = 1;
+          int scale = 0;
+          if (hd != null) {
+            prec = hd.precision();
+            scale = hd.scale();
+          }
+          DecimalTypeInfo typeInfo = TypeInfoFactory.getDecimalTypeInfo(prec, scale);
+          return new ExprNodeConstantDesc(typeInfo, strVal);
         } else {
           v = Double.valueOf(expr.getText());
           v = Long.valueOf(expr.getText());
@@ -573,6 +584,8 @@ public final class TypeCheckProcFactory 
           serdeConstants.DOUBLE_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_STRING,
           serdeConstants.STRING_TYPE_NAME);
+      conversionFunctionTextHashMap.put(HiveParser.TOK_CHAR,
+          serdeConstants.CHAR_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_VARCHAR,
           serdeConstants.VARCHAR_TYPE_NAME);
       conversionFunctionTextHashMap.put(HiveParser.TOK_BINARY,
@@ -796,6 +809,13 @@ public final class TypeCheckProcFactory 
         if (isFunction) {
           ASTNode funcNameNode = (ASTNode)expr.getChild(0);
           switch (funcNameNode.getType()) {
+            case HiveParser.TOK_CHAR:
+              // Add type params
+              CharTypeInfo charTypeInfo = ParseUtils.getCharTypeInfo(funcNameNode);
+              if (genericUDF != null) {
+                ((SettableUDF)genericUDF).setTypeInfo(charTypeInfo);
+              }
+              break;
             case HiveParser.TOK_VARCHAR:
               VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(funcNameNode);
               if (genericUDF != null) {

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java Tue Nov 12 18:23:05 2013
@@ -36,6 +36,7 @@ public class ExplainWork implements Seri
 
   private String resFile;
   private ArrayList<Task<? extends Serializable>> rootTasks;
+  private Task<? extends Serializable> fetchTask;
   private String astStringTree;
   private HashSet<ReadEntity> inputs;
   private ParseContext pCtx;
@@ -45,6 +46,8 @@ public class ExplainWork implements Seri
   boolean dependency;
   boolean logical;
 
+  boolean appendTaskType;
+
 
   public ExplainWork() {
   }
@@ -52,6 +55,7 @@ public class ExplainWork implements Seri
   public ExplainWork(String resFile,
       ParseContext pCtx,
       List<Task<? extends Serializable>> rootTasks,
+      Task<? extends Serializable> fetchTask,
       String astStringTree,
       HashSet<ReadEntity> inputs,
       boolean extended,
@@ -60,6 +64,7 @@ public class ExplainWork implements Seri
       boolean logical) {
     this.resFile = resFile;
     this.rootTasks = new ArrayList<Task<? extends Serializable>>(rootTasks);
+    this.fetchTask = fetchTask;
     this.astStringTree = astStringTree;
     this.inputs = inputs;
     this.extended = extended;
@@ -85,6 +90,14 @@ public class ExplainWork implements Seri
     this.rootTasks = rootTasks;
   }
 
+  public Task<? extends Serializable> getFetchTask() {
+    return fetchTask;
+  }
+
+  public void setFetchTask(Task<? extends Serializable> fetchTask) {
+    this.fetchTask = fetchTask;
+  }
+
   public String getAstStringTree() {
     return astStringTree;
   }
@@ -141,4 +154,11 @@ public class ExplainWork implements Seri
     this.logical = logical;
   }
 
+  public boolean isAppendTaskType() {
+    return appendTaskType;
+  }
+
+  public void setAppendTaskType(boolean appendTaskType) {
+    this.appendTaskType = appendTaskType;
+  }
 }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java Tue Nov 12 18:23:05 2013
@@ -62,7 +62,7 @@ public class UDFOPDivide extends UDF {
   public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) {
     // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":"
     // + b);
-    if ((a == null) || (b == null)) {
+    if (a == null || b == null || b.get() == 0.0) {
       return null;
     }
 

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java Tue Nov 12 18:23:05 2013
@@ -83,6 +83,7 @@ public class GenericUDAFComputeStats ext
     case DOUBLE:
       return new GenericUDAFDoubleStatsEvaluator();
     case STRING:
+    case CHAR:
     case VARCHAR:
       return new GenericUDAFStringStatsEvaluator();
     case BINARY:

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java Tue Nov 12 18:23:05 2013
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.udf.generic;
 
+import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
@@ -33,8 +34,8 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.io.BytesWritable;
 
 /**
@@ -62,7 +63,9 @@ public class GenericUDFConcat extends Ge
 
     // Loop through all the inputs to determine the appropriate return type/length.
     // Return type:
+    //  All CHAR inputs: return CHAR
     //  All VARCHAR inputs: return VARCHAR
+    //  All CHAR/VARCHAR inputs: return VARCHAR
     //  All BINARY inputs: return BINARY
     //  Otherwise return STRING
     argumentOIs = arguments;
@@ -88,10 +91,14 @@ public class GenericUDFConcat extends Ge
             returnType = PrimitiveCategory.STRING;
           }
           break;
+        case CHAR:
         case VARCHAR:
           if (!fixedLengthReturnValue) {
             returnType = PrimitiveCategory.STRING;
           }
+          if (fixedLengthReturnValue && currentCategory == PrimitiveCategory.VARCHAR) {
+            returnType = PrimitiveCategory.VARCHAR;
+          }
           break;
         default:
           returnType = PrimitiveCategory.STRING;
@@ -104,8 +111,10 @@ public class GenericUDFConcat extends Ge
       // max length for the char/varchar, then the return type reverts to string.
       if (fixedLengthReturnValue) {
         returnLength += GenericUDFUtils.StringHelper.getFixedStringSizeForType(poi);
-        if (returnType == PrimitiveCategory.VARCHAR
-            && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH) {
+        if ((returnType == PrimitiveCategory.VARCHAR
+                && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH)
+            || (returnType == PrimitiveCategory.CHAR
+                && returnLength > HiveChar.MAX_CHAR_LENGTH)) {
           returnType = PrimitiveCategory.STRING;
           fixedLengthReturnValue = false;
         }
@@ -119,11 +128,15 @@ public class GenericUDFConcat extends Ge
       // treat all inputs as string, the return value will be converted to the appropriate type.
       createStringConverters();
       returnHelper = new GenericUDFUtils.StringHelper(returnType);
+      BaseCharTypeInfo typeInfo;
       switch (returnType) {
         case STRING:
           return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+        case CHAR:
+          typeInfo = TypeInfoFactory.getCharTypeInfo(returnLength);
+          return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
         case VARCHAR:
-          VarcharTypeInfo typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength);
+          typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength);
           return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
         default:
           throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType);

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java Tue Nov 12 18:23:05 2013
@@ -31,8 +31,8 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 
 /**
  * UDFLower.
@@ -65,11 +65,19 @@ public class GenericUDFLower extends Gen
     stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI);
     PrimitiveCategory inputType = argumentOI.getPrimitiveCategory();
     ObjectInspector outputOI = null;
+    BaseCharTypeInfo typeInfo;
     switch (inputType) {
+      case CHAR:
+        // return type should have same length as the input.
+        returnType = inputType;
+        typeInfo = TypeInfoFactory.getCharTypeInfo(
+            GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI));
+        outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
+        break;
       case VARCHAR:
         // return type should have same length as the input.
         returnType = inputType;
-        VarcharTypeInfo typeInfo = TypeInfoFactory.getVarcharTypeInfo(
+        typeInfo = TypeInfoFactory.getVarcharTypeInfo(
             GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI));
         outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
         break;

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java Tue Nov 12 18:23:05 2013
@@ -97,7 +97,7 @@ public class GenericUDFOPEqualOrGreaterT
       Text t0, t1;
       t0 = soi0.getPrimitiveWritableObject(o0);
       t1 = soi1.getPrimitiveWritableObject(o1);
-      result.set(ShimLoader.getHadoopShims().compareText(t0, t1) >= 0);
+      result.set(t0.compareTo(t1) >= 0);
       break;
     case COMPARE_INT:
       result.set(ioi0.get(o0) >= ioi1.get(o1));

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java Tue Nov 12 18:23:05 2013
@@ -97,7 +97,7 @@ public class GenericUDFOPEqualOrLessThan
       Text t0, t1;
       t0 = soi0.getPrimitiveWritableObject(o0);
       t1 = soi1.getPrimitiveWritableObject(o1);
-      result.set(ShimLoader.getHadoopShims().compareText(t0, t1) <= 0);
+      result.set(t0.compareTo(t1) <= 0);
       break;
     case COMPARE_INT:
       result.set(ioi0.get(o0) <= ioi1.get(o1));

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java Tue Nov 12 18:23:05 2013
@@ -97,7 +97,7 @@ public class GenericUDFOPGreaterThan ext
       Text t0, t1;
       t0 = soi0.getPrimitiveWritableObject(o0);
       t1 = soi1.getPrimitiveWritableObject(o1);
-      result.set(ShimLoader.getHadoopShims().compareText(t0, t1) > 0);
+      result.set(t0.compareTo(t1) > 0);
       break;
     case COMPARE_INT:
       result.set(ioi0.get(o0) > ioi1.get(o1));

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java?rev=1541190&r1=1541189&r2=1541190&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java Tue Nov 12 18:23:05 2013
@@ -68,7 +68,7 @@ public class GenericUDFOPLessThan extend
       Text t0, t1;
       t0 = soi0.getPrimitiveWritableObject(o0);
       t1 = soi1.getPrimitiveWritableObject(o1);
-      result.set(ShimLoader.getHadoopShims().compareText(t0, t1) < 0);
+      result.set(t0.compareTo(t1) < 0);
       break;
     case COMPARE_INT:
       result.set(ioi0.get(o0) < ioi1.get(o1));