You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/09/23 22:41:01 UTC

svn commit: r1525692 [2/8] - in /hive/branches/vectorization: ./ common/src/java/org/apache/hadoop/hive/conf/ conf/ contrib/src/test/results/clientpositive/ hbase-handler/src/java/org/apache/hadoop/hive/hbase/ hbase-handler/src/test/results/positive/ h...

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Mon Sep 23 20:40:54 2013
@@ -48,7 +48,6 @@ import org.apache.hadoop.hive.ql.udf.Gen
 import org.apache.hadoop.hive.ql.udf.GenericUDFEncode;
 import org.apache.hadoop.hive.ql.udf.SettableUDF;
 import org.apache.hadoop.hive.ql.udf.UDAFPercentile;
-import org.apache.hadoop.hive.ql.udf.UDFAbs;
 import org.apache.hadoop.hive.ql.udf.UDFAcos;
 import org.apache.hadoop.hive.ql.udf.UDFAscii;
 import org.apache.hadoop.hive.ql.udf.UDFAsin;
@@ -211,7 +210,7 @@ public final class FunctionRegistry {
     registerUDF("ceil", UDFCeil.class, false);
     registerUDF("ceiling", UDFCeil.class, false);
     registerUDF("rand", UDFRand.class, false);
-    registerUDF("abs", UDFAbs.class, false);
+    registerGenericUDF("abs", GenericUDFAbs.class);
     registerUDF("pmod", UDFPosMod.class, false);
 
     registerUDF("ln", UDFLn.class, false);

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java Mon Sep 23 20:40:54 2013
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.ql.io.HiveKey;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
@@ -96,9 +97,10 @@ public class ReduceSinkOperator extends 
   protected void initializeOp(Configuration hconf) throws HiveException {
 
     try {
-      keyEval = new ExprNodeEvaluator[conf.getKeyCols().size()];
+      List<ExprNodeDesc> keys = conf.getKeyCols();
+      keyEval = new ExprNodeEvaluator[keys.size()];
       int i = 0;
-      for (ExprNodeDesc e : conf.getKeyCols()) {
+      for (ExprNodeDesc e : keys) {
         keyEval[i++] = ExprNodeEvaluatorFactory.get(e);
       }
 
@@ -115,7 +117,8 @@ public class ReduceSinkOperator extends 
       partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
       i = 0;
       for (ExprNodeDesc e : conf.getPartitionCols()) {
-        partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
+        int index = ExprNodeDescUtils.indexOf(e, keys);
+        partitionEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e): keyEval[index];
       }
 
       tag = conf.getTag();
@@ -277,27 +280,27 @@ public class ReduceSinkOperator extends 
       }
 
       // Evaluate the keys
-      Object[] distributionKeys = new Object[numDistributionKeys];
       for (int i = 0; i < numDistributionKeys; i++) {
-        distributionKeys[i] = keyEval[i].evaluate(row);
+        cachedKeys[0][i] = keyEval[i].evaluate(row);
       }
-
       if (numDistinctExprs > 0) {
         // with distinct key(s)
         for (int i = 0; i < numDistinctExprs; i++) {
-          System.arraycopy(distributionKeys, 0, cachedKeys[i], 0, numDistributionKeys);
-          Object[] distinctParameters =
-            new Object[distinctColIndices.get(i).size()];
+          if (i > 0) {
+            System.arraycopy(cachedKeys[0], 0, cachedKeys[i], 0, numDistributionKeys);
+          }
+          StandardUnion union = (StandardUnion) cachedKeys[i][numDistributionKeys];
+          if (union == null) {
+            cachedKeys[i][numDistributionKeys] =
+              union = new StandardUnion((byte)i, new Object[distinctColIndices.get(i).size()]);
+          }
+          Object[] distinctParameters = (Object[]) union.getObject();
           for (int j = 0; j < distinctParameters.length; j++) {
             distinctParameters[j] =
               keyEval[distinctColIndices.get(i).get(j)].evaluate(row);
           }
-          cachedKeys[i][numDistributionKeys] =
-              new StandardUnion((byte)i, distinctParameters);
+          union.setTag((byte) i);
         }
-      } else {
-        // no distinct key
-        System.arraycopy(distributionKeys, 0, cachedKeys[0], 0, numDistributionKeys);
       }
 
       BytesWritable value = null;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Mon Sep 23 20:40:54 2013
@@ -33,6 +33,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.persistence.RowContainer;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext;
 import org.apache.hadoop.hive.ql.plan.FetchWork;
@@ -125,8 +126,8 @@ public class SMBMapJoinOperator extends 
 
     int bucketSize;
 
-    // For backwards compatibility reasons we honor the older 
-    // HIVEMAPJOINBUCKETCACHESIZE if set different from default. 
+    // For backwards compatibility reasons we honor the older
+    // HIVEMAPJOINBUCKETCACHESIZE if set different from default.
     // By hive 0.13 we should remove this code.
     int oldVar = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
     if (oldVar != 100) {
@@ -186,36 +187,31 @@ public class SMBMapJoinOperator extends 
       String alias = entry.getKey();
       FetchWork fetchWork = entry.getValue();
 
-      Operator<? extends OperatorDesc> forwardOp = aliasToWork.get(alias);
-      forwardOp.setExecContext(getExecContext());
+      JobConf jobClone = new JobConf(hconf);
+
+      TableScanOperator ts = (TableScanOperator)aliasToWork.get(alias);
+      // push down projections
+      ColumnProjectionUtils.appendReadColumns(
+          jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns());
+      // push down filters
+      HiveInputFormat.pushFilters(jobClone, ts);
+
+
+      ts.setExecContext(getExecContext());
 
-      JobConf jobClone = cloneJobConf(hconf, forwardOp);
       FetchOperator fetchOp = new FetchOperator(fetchWork, jobClone);
-      forwardOp.initialize(jobClone, new ObjectInspector[]{fetchOp.getOutputObjectInspector()});
+      ts.initialize(jobClone, new ObjectInspector[]{fetchOp.getOutputObjectInspector()});
       fetchOp.clearFetchContext();
 
       DummyStoreOperator sinkOp = aliasToSinkWork.get(alias);
 
-      MergeQueue mergeQueue = new MergeQueue(alias, fetchWork, jobClone, forwardOp, sinkOp);
+      MergeQueue mergeQueue = new MergeQueue(alias, fetchWork, jobClone, ts, sinkOp);
 
       aliasToMergeQueue.put(alias, mergeQueue);
       l4j.info("fetch operators for " + alias + " initialized");
     }
   }
 
-  private JobConf cloneJobConf(Configuration hconf, Operator<?> op) {
-    JobConf jobClone = new JobConf(hconf);
-    if (op instanceof TableScanOperator) {
-      List<Integer> list = ((TableScanOperator)op).getNeededColumnIDs();
-      if (list != null) {
-        ColumnProjectionUtils.appendReadColumnIDs(jobClone, list);
-      }
-    } else {
-      ColumnProjectionUtils.setFullyReadColumns(jobClone);
-    }
-    return jobClone;
-  }
-
   private byte tagForAlias(String alias) {
     for (byte tag = 0; tag < tagToAlias.length; tag++) {
       if (alias.equals(tagToAlias[tag])) {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java Mon Sep 23 20:40:54 2013
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
-import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -31,6 +30,7 @@ import org.apache.hadoop.hive.common.Fil
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
@@ -237,18 +237,22 @@ public class TableScanOperator extends O
     return "TS";
   }
 
-  // this 'neededColumnIDs' field is included in this operator class instead of
+  // This 'neededColumnIDs' field is included in this operator class instead of
   // its desc class.The reason is that 1)tableScanDesc can not be instantiated,
   // and 2) it will fail some join and union queries if this is added forcibly
-  // into tableScanDesc
-  java.util.ArrayList<Integer> neededColumnIDs;
+  // into tableScanDesc.
+  // Both neededColumnIDs and neededColumns should never be null.
+  // When neededColumnIDs is an empty list,
+  // it means no needed column (e.g. we do not need any column to evaluate
+  // SELECT count(*) FROM t).
+  List<Integer> neededColumnIDs;
   List<String> neededColumns;
 
-  public void setNeededColumnIDs(java.util.ArrayList<Integer> orign_columns) {
+  public void setNeededColumnIDs(List<Integer> orign_columns) {
     neededColumnIDs = orign_columns;
   }
 
-  public java.util.ArrayList<Integer> getNeededColumnIDs() {
+  public List<Integer> getNeededColumnIDs() {
     return neededColumnIDs;
   }
 
@@ -324,4 +328,14 @@ public class TableScanOperator extends O
   public boolean supportAutomaticSortMergeJoin() {
     return true;
   }
+
+  @Override
+  public Operator<? extends OperatorDesc> clone()
+    throws CloneNotSupportedException {
+    TableScanOperator ts = (TableScanOperator) super.clone();
+    ts.setNeededColumnIDs(new ArrayList<Integer>(getNeededColumnIDs()));
+    ts.setNeededColumns(new ArrayList<String>(getNeededColumns()));
+    return ts;
+  }
+
 }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java Mon Sep 23 20:40:54 2013
@@ -25,7 +25,6 @@ import java.io.Serializable;
 import java.lang.management.ManagementFactory;
 import java.lang.management.MemoryMXBean;
 import java.text.SimpleDateFormat;
-import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Collection;
 import java.util.HashMap;
@@ -55,6 +54,7 @@ import org.apache.hadoop.hive.ql.exec.Ut
 import org.apache.hadoop.hive.ql.exec.Utilities.StreamPrinter;
 import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionException;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext;
 import org.apache.hadoop.hive.ql.plan.FetchWork;
@@ -376,20 +376,12 @@ public class MapredLocalTask extends Tas
     for (Map.Entry<String, FetchWork> entry : work.getAliasToFetchWork().entrySet()) {
       JobConf jobClone = new JobConf(job);
 
-      Operator<? extends OperatorDesc> tableScan =
-        work.getAliasToWork().get(entry.getKey());
-      boolean setColumnsNeeded = false;
-      if (tableScan instanceof TableScanOperator) {
-        ArrayList<Integer> list = ((TableScanOperator) tableScan).getNeededColumnIDs();
-        if (list != null) {
-          ColumnProjectionUtils.appendReadColumnIDs(jobClone, list);
-          setColumnsNeeded = true;
-        }
-      }
-
-      if (!setColumnsNeeded) {
-        ColumnProjectionUtils.setFullyReadColumns(jobClone);
-      }
+      TableScanOperator ts = (TableScanOperator)work.getAliasToWork().get(entry.getKey());
+      // push down projections
+      ColumnProjectionUtils.appendReadColumns(
+          jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns());
+      // push down filters
+      HiveInputFormat.pushFilters(jobClone, ts);
 
       // create a fetch operator
       FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone);

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java Mon Sep 23 20:40:54 2013
@@ -66,16 +66,14 @@ public class BucketizedHiveInputFormat<K
       throw new IOException("cannot find class " + inputFormatClassName);
     }
 
-    // clone a jobConf for setting needed columns for reading
-    JobConf cloneJobConf = new JobConf(job);
-    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
+    pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath()
         .toString(), hsplit.getPath().toUri().getPath());
 
-    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
-        cloneJobConf);
-    BucketizedHiveRecordReader<K, V> rr= new BucketizedHiveRecordReader(inputFormat, hsplit, cloneJobConf,
+    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
+
+    BucketizedHiveRecordReader<K, V> rr= new BucketizedHiveRecordReader(inputFormat, hsplit, job,
         reporter);
-    rr.initIOContext(hsplit, cloneJobConf, inputFormatClass);
+    rr.initIOContext(hsplit, job, inputFormatClass);
     return rr;
   }
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Mon Sep 23 20:40:54 2013
@@ -217,9 +217,6 @@ public class HiveInputFormat<K extends W
       throw new IOException("cannot find class " + inputFormatClassName, e);
     }
 
-    // clone a jobConf for setting needed columns for reading
-    JobConf cloneJobConf = new JobConf(job);
-
     if (this.mrwork == null) {
       init(job);
     }
@@ -227,22 +224,20 @@ public class HiveInputFormat<K extends W
     boolean nonNative = false;
     PartitionDesc part = pathToPartitionInfo.get(hsplit.getPath().toString());
     if ((part != null) && (part.getTableDesc() != null)) {
-      Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
+      Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), job);
       nonNative = part.getTableDesc().isNonNative();
     }
 
-    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
+    pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath()
       .toString(), hsplit.getPath().toUri().getPath(), nonNative);
 
-    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
-        cloneJobConf);
+    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
     RecordReader innerReader = null;
     try {
-      innerReader = inputFormat.getRecordReader(inputSplit,
-        cloneJobConf, reporter);
+      innerReader = inputFormat.getRecordReader(inputSplit, job, reporter);
     } catch (Exception e) {
       innerReader = HiveIOExceptionHandlerUtil
-          .handleRecordReaderCreationException(e, cloneJobConf);
+          .handleRecordReaderCreationException(e, job);
     }
     HiveRecordReader<K,V> rr = new HiveRecordReader(innerReader, job);
     rr.initIOContext(hsplit, job, inputFormatClass, innerReader);
@@ -420,20 +415,13 @@ public class HiveInputFormat<K extends W
     for (String alias : aliases) {
       Operator<? extends OperatorDesc> op = this.mrwork.getAliasToWork().get(
         alias);
-      if (op != null && op instanceof TableScanOperator) {
-        TableScanOperator tableScan = (TableScanOperator) op;
-
-        // push down projections
-        ArrayList<Integer> list = tableScan.getNeededColumnIDs();
-        if (list != null) {
-          ColumnProjectionUtils.appendReadColumnIDs(jobConf, list);
-        } else {
-          ColumnProjectionUtils.setFullyReadColumns(jobConf);
-        }
-        ColumnProjectionUtils.appendReadColumnNames(jobConf,
-            tableScan.getNeededColumns());
-
-        pushFilters(jobConf, tableScan);
+      if (op instanceof TableScanOperator) {
+        TableScanOperator ts = (TableScanOperator) op;
+        // push down projections.
+        ColumnProjectionUtils.appendReadColumns(
+            jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns());
+        // push down filters
+        pushFilters(jobConf, ts);
       }
     }
   }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java Mon Sep 23 20:40:54 2013
@@ -27,6 +27,7 @@ import java.io.IOException;
 import java.rmi.server.UID;
 import java.security.MessageDigest;
 import java.util.Arrays;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -1379,27 +1380,22 @@ public class RCFile {
       columnNumber = Integer.parseInt(metadata.get(
           new Text(COLUMN_NUMBER_METADATA_STR)).toString());
 
-      java.util.ArrayList<Integer> notSkipIDs = ColumnProjectionUtils
+      List<Integer> notSkipIDs = ColumnProjectionUtils
           .getReadColumnIDs(conf);
       boolean[] skippedColIDs = new boolean[columnNumber];
-      if (notSkipIDs.size() > 0) {
-        for (int i = 0; i < skippedColIDs.length; i++) {
-          skippedColIDs[i] = true;
-        }
+      if(ColumnProjectionUtils.isReadAllColumns(conf)) {
+        Arrays.fill(skippedColIDs, false);
+      } else if (notSkipIDs.size() > 0) {
+        Arrays.fill(skippedColIDs, true);
         for (int read : notSkipIDs) {
           if (read < columnNumber) {
             skippedColIDs[read] = false;
           }
         }
       } else {
-        // TODO: if no column name is specified e.g, in select count(1) from tt;
-        // skip all columns, this should be distinguished from the case:
-        // select * from tt;
-        for (int i = 0; i < skippedColIDs.length; i++) {
-          skippedColIDs[i] = false;
-        }
+        // select count(1)
+        Arrays.fill(skippedColIDs, true);
       }
-
       loadColumnNum = columnNumber;
       if (skippedColIDs.length > 0) {
         for (boolean skippedColID : skippedColIDs) {
@@ -1584,7 +1580,7 @@ public class RCFile {
         byte[] buffer = new byte[prefix+n];
         n = (int)Math.min(n, end - in.getPos());
         /* fill array with a pattern that will never match sync */
-        Arrays.fill(buffer, (byte)(~sync[0])); 
+        Arrays.fill(buffer, (byte)(~sync[0]));
         while(n > 0 && (in.getPos() + n) <= end) {
           position = in.getPos();
           in.readFully(buffer, prefix, n);

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java Mon Sep 23 20:40:54 2013
@@ -19,14 +19,14 @@
 package org.apache.hadoop.hive.ql.io;
 
 import java.io.IOException;
-import java.util.Map;
-import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Map;
 import java.util.WeakHashMap;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.io.RCFile.KeyBuffer;
 import org.apache.hadoop.hive.ql.io.RCFile.Reader;
 import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
@@ -35,8 +35,6 @@ import org.apache.hadoop.mapred.FileSpli
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.util.ReflectionUtils;
 
-import org.apache.hadoop.hive.conf.HiveConf;
-
 /**
  * RCFileRecordReader.
  *
@@ -71,7 +69,7 @@ public class RCFileRecordReader<K extend
 
     public void put(FileSplit split, long endSync) {
       Path path = split.getPath();
-      long end = split.getStart() + split.getLength();       
+      long end = split.getStart() + split.getLength();
       String key = path.toString()+"+"+String.format("%d",end);
 
       RCFileSyncEntry entry = new RCFileSyncEntry();

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java Mon Sep 23 20:40:54 2013
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.io.avr
 
 
 import java.io.IOException;
+import java.rmi.server.UID;
 import java.util.Map;
 import java.util.Properties;
 
@@ -57,6 +58,10 @@ public class AvroGenericRecordReader imp
   final private long start;
   final private long stop;
   protected JobConf jobConf;
+  /**
+   * A unique ID for each record reader.
+   */
+  final private UID recordReaderID;
 
   public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) throws IOException {
     this.jobConf = job;
@@ -78,6 +83,7 @@ public class AvroGenericRecordReader imp
     this.reader.sync(split.getStart());
     this.start = reader.tell();
     this.stop = split.getStart() + split.getLength();
+    this.recordReaderID = new UID();
   }
 
   /**
@@ -148,6 +154,7 @@ public class AvroGenericRecordReader imp
 
     GenericData.Record r = (GenericData.Record)reader.next();
     record.setRecord(r);
+    record.setRecordReaderID(recordReaderID);
 
     return true;
   }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Mon Sep 23 20:40:54 2013
@@ -20,9 +20,13 @@ package org.apache.hadoop.hive.ql.io.orc
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
-
-import com.google.common.collect.Lists;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -51,14 +55,6 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.util.StringUtils;
-
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-
 /**
  * A MapReduce/Hive input format for ORC files.
  */
@@ -125,7 +121,7 @@ public class OrcInputFormat  implements 
       } else {
         LOG.info("No ORC pushdown predicate");
       }
-      this.reader = file.rows(offset, length,includeColumn, sarg, columnNames);
+      this.reader = file.rows(offset, length, includeColumn, sarg, columnNames);
       this.offset = offset;
       this.length = length;
     }
@@ -199,9 +195,7 @@ public class OrcInputFormat  implements 
    */
   static boolean[] findIncludedColumns(List<OrcProto.Type> types,
                                                Configuration conf) {
-    String includedStr =
-        conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
-    if (includedStr == null || includedStr.trim().length() == 0) {
+    if (ColumnProjectionUtils.isReadAllColumns(conf)) {
       return null;
     } else {
       int numColumns = types.size();

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Mon Sep 23 20:40:54 2013
@@ -301,7 +301,7 @@ public final class ColumnPrunerProcFacto
           .genColLists((Operator<? extends OperatorDesc>) nd);
       cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd,
           cols);
-      ArrayList<Integer> needed_columns = new ArrayList<Integer>();
+      List<Integer> neededColumnIds = new ArrayList<Integer>();
       List<String> neededColumnNames = new ArrayList<String>();
       RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRowResolver();
       TableScanDesc desc = scanOp.getConf();
@@ -332,15 +332,15 @@ public final class ColumnPrunerProcFacto
           continue;
         }
         int position = inputRR.getPosition(cols.get(i));
-        if (position >=0) {
+        if (position >= 0) {
           // get the needed columns by id and name
-          needed_columns.add(position);
+          neededColumnIds.add(position);
           neededColumnNames.add(cols.get(i));
         }
       }
 
       desc.setVirtualCols(newVirtualCols);
-      scanOp.setNeededColumnIDs(needed_columns);
+      scanOp.setNeededColumnIDs(neededColumnIds);
       scanOp.setNeededColumns(neededColumnNames);
       return null;
     }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java Mon Sep 23 20:40:54 2013
@@ -64,7 +64,6 @@ import org.apache.hadoop.hive.ql.plan.Op
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.StatsWork;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.mapred.InputFormat;
 
@@ -299,7 +298,7 @@ public class GenMRFileSink1 implements N
     // Create a TableScan operator
     RowSchema inputRS = fsInput.getSchema();
     Operator<? extends OperatorDesc> tsMerge =
-        OperatorFactory.get(TableScanDesc.class, inputRS);
+        GenMapRedUtils.createTemporaryTableScanOperator(inputRS);
 
     // Create a FileSink operator
     TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java Mon Sep 23 20:40:54 2013
@@ -19,15 +19,12 @@
 package org.apache.hadoop.hive.ql.optimizer;
 
 import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
 import java.util.Map;
 import java.util.Stack;
 
-import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
 import org.apache.hadoop.hive.ql.exec.UnionOperator;
@@ -41,12 +38,10 @@ import org.apache.hadoop.hive.ql.optimiz
 import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcFactory;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
 import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 
 /**
  * Processor for the rule - TableScan followed by Union.
@@ -131,33 +126,15 @@ public class GenMRUnion1 implements Node
     Context baseCtx = parseCtx.getContext();
     String taskTmpDir = baseCtx.getMRTmpFileURI();
 
-    // Create a file sink operator for this file name
-    Operator<? extends OperatorDesc> fs_op = OperatorFactory.get(
-        new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
-            HiveConf.ConfVars.COMPRESSINTERMEDIATE)), parent.getSchema());
-
-    assert parent.getChildOperators().size() == 1;
-    parent.getChildOperators().set(0, fs_op);
-
-    List<Operator<? extends OperatorDesc>> parentOpList =
-        new ArrayList<Operator<? extends OperatorDesc>>();
-    parentOpList.add(parent);
-    fs_op.setParentOperators(parentOpList);
-
-    // Create a dummy table scan operator
-    Operator<? extends OperatorDesc> ts_op = OperatorFactory.get(
-        new TableScanDesc(), parent.getSchema());
-    List<Operator<? extends OperatorDesc>> childOpList =
-        new ArrayList<Operator<? extends OperatorDesc>>();
-    childOpList.add(child);
-    ts_op.setChildOperators(childOpList);
-    child.replaceParent(parent, ts_op);
+    // Create the temporary file, its corresponding FileSinkOperaotr, and
+    // its corresponding TableScanOperator.
+    TableScanOperator tableScanOp =
+        GenMapRedUtils.createTemporaryFile(parent, child, taskTmpDir, tt_desc, parseCtx);
 
     // Add the path to alias mapping
-
     uCtxTask.addTaskTmpDir(taskTmpDir);
     uCtxTask.addTTDesc(tt_desc);
-    uCtxTask.addListTopOperators(ts_op);
+    uCtxTask.addListTopOperators(tableScanOp);
 
     // The union task is empty. The files created for all the inputs are
     // assembled in the union context and later used to initialize the union

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Mon Sep 23 20:40:54 2013
@@ -34,12 +34,14 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.ConditionalTask;
 import org.apache.hadoop.hive.ql.exec.DemuxOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorFactory;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
@@ -864,6 +866,70 @@ public final class GenMapRedUtils {
     return op;
   }
 
+  public static TableScanOperator createTemporaryTableScanOperator(RowSchema rowSchema) {
+    TableScanOperator tableScanOp =
+        (TableScanOperator) OperatorFactory.get(new TableScanDesc(), rowSchema);
+    // Set needed columns for this dummy TableScanOperator
+    List<Integer> neededColumnIds = new ArrayList<Integer>();
+    List<String> neededColumnNames = new ArrayList<String>();
+    List<ColumnInfo> parentColumnInfos = rowSchema.getSignature();
+    for (int i = 0 ; i < parentColumnInfos.size(); i++) {
+      neededColumnIds.add(i);
+      neededColumnNames.add(parentColumnInfos.get(i).getInternalName());
+    }
+    tableScanOp.setNeededColumnIDs(neededColumnIds);
+    tableScanOp.setNeededColumns(neededColumnNames);
+    return tableScanOp;
+  }
+
+  /**
+   * Break the pipeline between parent and child, and then
+   * output data generated by parent to a temporary file stored in taskTmpDir.
+   * A FileSinkOperator is added after parent to output the data.
+   * Before child, we add a TableScanOperator to load data stored in the temporary
+   * file back.
+   * @param parent
+   * @param child
+   * @param taskTmpDir
+   * @param tt_desc
+   * @param parseCtx
+   * @return The TableScanOperator inserted before child.
+   */
+  protected static TableScanOperator createTemporaryFile(
+      Operator<? extends OperatorDesc> parent, Operator<? extends OperatorDesc> child,
+      String taskTmpDir, TableDesc tt_desc, ParseContext parseCtx) {
+
+    // Create a FileSinkOperator for the file name of taskTmpDir
+    boolean compressIntermediate =
+        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE);
+    FileSinkDesc desc = new FileSinkDesc(taskTmpDir, tt_desc, compressIntermediate);
+    if (compressIntermediate) {
+      desc.setCompressCodec(parseCtx.getConf().getVar(
+          HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
+      desc.setCompressType(parseCtx.getConf().getVar(
+          HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
+    }
+    Operator<? extends OperatorDesc> fileSinkOp = putOpInsertMap(OperatorFactory
+        .get(desc, parent.getSchema()), null, parseCtx);
+
+    // Connect parent to fileSinkOp
+    parent.replaceChild(child, fileSinkOp);
+    fileSinkOp.setParentOperators(Utilities.makeList(parent));
+
+    // Create a dummy TableScanOperator for the file generated through fileSinkOp
+    RowResolver parentRowResolver =
+        parseCtx.getOpParseCtx().get(parent).getRowResolver();
+    TableScanOperator tableScanOp = (TableScanOperator) putOpInsertMap(
+        createTemporaryTableScanOperator(parent.getSchema()),
+        parentRowResolver, parseCtx);
+
+    // Connect this TableScanOperator to child.
+    tableScanOp.setChildOperators(Utilities.makeList(child));
+    child.replaceParent(parent, tableScanOp);
+
+    return tableScanOp;
+  }
+
   @SuppressWarnings("nls")
   /**
    * Split two tasks by creating a temporary file between them.
@@ -891,7 +957,7 @@ public final class GenMapRedUtils {
       rootTasks.remove(childTask);
     }
 
-    // generate the temporary file
+    // Generate the temporary file name
     Context baseCtx = parseCtx.getContext();
     String taskTmpDir = baseCtx.getMRTmpFileURI();
 
@@ -899,55 +965,18 @@ public final class GenMapRedUtils {
     TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils
         .getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
 
-    // Create a file sink operator for this file name
-    boolean compressIntermediate = parseCtx.getConf().getBoolVar(
-        HiveConf.ConfVars.COMPRESSINTERMEDIATE);
-    FileSinkDesc desc = new FileSinkDesc(taskTmpDir, tt_desc,
-        compressIntermediate);
-    if (compressIntermediate) {
-      desc.setCompressCodec(parseCtx.getConf().getVar(
-          HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
-      desc.setCompressType(parseCtx.getConf().getVar(
-          HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
-    }
-    Operator<? extends OperatorDesc> fs_op = putOpInsertMap(OperatorFactory
-        .get(desc, parent.getSchema()), null, parseCtx);
-
-    // replace the reduce child with this operator
-    List<Operator<? extends OperatorDesc>> childOpList = parent
-        .getChildOperators();
-    for (int pos = 0; pos < childOpList.size(); pos++) {
-      if (childOpList.get(pos) == op) {
-        childOpList.set(pos, fs_op);
-        break;
-      }
-    }
-
-    List<Operator<? extends OperatorDesc>> parentOpList =
-        new ArrayList<Operator<? extends OperatorDesc>>();
-    parentOpList.add(parent);
-    fs_op.setParentOperators(parentOpList);
-
-    // create a dummy tableScan operator on top of op
-    // TableScanOperator is implicitly created here for each MapOperator
-    RowResolver rowResolver = opProcCtx.getParseCtx().getOpParseCtx().get(parent).getRowResolver();
-    Operator<? extends OperatorDesc> ts_op = putOpInsertMap(OperatorFactory
-        .get(TableScanDesc.class, parent.getSchema()), rowResolver, parseCtx);
-
-    childOpList = new ArrayList<Operator<? extends OperatorDesc>>();
-    childOpList.add(op);
-    ts_op.setChildOperators(childOpList);
-    op.getParentOperators().set(0, ts_op);
+    // Create the temporary file, its corresponding FileSinkOperaotr, and
+    // its corresponding TableScanOperator.
+    TableScanOperator tableScanOp =
+        createTemporaryFile(parent, op, taskTmpDir, tt_desc, parseCtx);
 
     Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx =
         opProcCtx.getMapCurrCtx();
-    mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null));
+    mapCurrCtx.put(tableScanOp, new GenMapRedCtx(childTask, null));
 
     String streamDesc = taskTmpDir;
     MapredWork cplan = (MapredWork) childTask.getWork();
 
-    Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
-
     if (needsTagging(cplan.getReduceWork())) {
       String origStreamDesc;
       streamDesc = "$INTNAME";
@@ -963,7 +992,7 @@ public final class GenMapRedUtils {
     }
 
     // Add the path to alias mapping
-    setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan.getMapWork(), false, tt_desc);
+    setTaskPlan(taskTmpDir, streamDesc, tableScanOp, cplan.getMapWork(), false, tt_desc);
     opProcCtx.setCurrTopOp(null);
     opProcCtx.setCurrAliasId(null);
     opProcCtx.setCurrTask(childTask);

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Mon Sep 23 20:40:54 2013
@@ -61,9 +61,7 @@ public class Optimizer {
         transformations.add(new ListBucketingPruner());
       }
     }
-    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCP)) {
-      transformations.add(new ColumnPruner());
-    }
+    transformations.add(new ColumnPruner());
     if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME)) {
       transformations.add(new SkewJoinOptimizer());
     }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java Mon Sep 23 20:40:54 2013
@@ -27,6 +27,7 @@ import java.util.Map;
 
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.ConditionalTask;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -38,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.Ta
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ConditionalResolverSkewJoin;
@@ -54,7 +56,6 @@ import org.apache.hadoop.hive.ql.plan.Op
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
@@ -157,6 +158,7 @@ public final class GenMRSkewJoinProcesso
         .getProperties());
 
     Map<Byte, TableDesc> tableDescList = new HashMap<Byte, TableDesc>();
+    Map<Byte, RowSchema> rowSchemaList = new HashMap<Byte, RowSchema>();
     Map<Byte, List<ExprNodeDesc>> newJoinValues = new HashMap<Byte, List<ExprNodeDesc>>();
     Map<Byte, List<ExprNodeDesc>> newJoinKeys = new HashMap<Byte, List<ExprNodeDesc>>();
     // used for create mapJoinDesc, should be in order
@@ -174,13 +176,17 @@ public final class GenMRSkewJoinProcesso
       int columnSize = valueCols.size();
       List<ExprNodeDesc> newValueExpr = new ArrayList<ExprNodeDesc>();
       List<ExprNodeDesc> newKeyExpr = new ArrayList<ExprNodeDesc>();
+      ArrayList<ColumnInfo> columnInfos = new ArrayList<ColumnInfo>();
 
       boolean first = true;
       for (int k = 0; k < columnSize; k++) {
         TypeInfo type = valueCols.get(k).getTypeInfo();
         String newColName = i + "_VALUE_" + k; // any name, it does not matter.
-        newValueExpr
-            .add(new ExprNodeColumnDesc(type, newColName, "" + i, false));
+        ColumnInfo columnInfo = new ColumnInfo(newColName, type, alias.toString(), false);
+        columnInfos.add(columnInfo);
+        newValueExpr.add(new ExprNodeColumnDesc(
+            columnInfo.getType(), columnInfo.getInternalName(),
+            columnInfo.getTabAlias(), false));
         if (!first) {
           colNames = colNames + ",";
           colTypes = colTypes + ",";
@@ -199,14 +205,18 @@ public final class GenMRSkewJoinProcesso
         first = false;
         colNames = colNames + joinKeys.get(k);
         colTypes = colTypes + joinKeyTypes.get(k);
-        newKeyExpr.add(new ExprNodeColumnDesc(TypeInfoFactory
-            .getPrimitiveTypeInfo(joinKeyTypes.get(k)), joinKeys.get(k),
-            "" + i, false));
+        ColumnInfo columnInfo = new ColumnInfo(joinKeys.get(k), TypeInfoFactory
+            .getPrimitiveTypeInfo(joinKeyTypes.get(k)), alias.toString(), false);
+        columnInfos.add(columnInfo);
+        newKeyExpr.add(new ExprNodeColumnDesc(
+            columnInfo.getType(), columnInfo.getInternalName(),
+            columnInfo.getTabAlias(), false));
       }
 
       newJoinValues.put(alias, newValueExpr);
       newJoinKeys.put(alias, newKeyExpr);
       tableDescList.put(alias, Utilities.getTableDesc(colNames, colTypes));
+      rowSchemaList.put(alias, new RowSchema(columnInfos));
 
       // construct value table Desc
       String valueColNames = "";
@@ -243,8 +253,8 @@ public final class GenMRSkewJoinProcesso
 
       Operator<? extends OperatorDesc>[] parentOps = new TableScanOperator[tags.length];
       for (int k = 0; k < tags.length; k++) {
-        Operator<? extends OperatorDesc> ts = OperatorFactory.get(
-            TableScanDesc.class, (RowSchema) null);
+        Operator<? extends OperatorDesc> ts =
+            GenMapRedUtils.createTemporaryTableScanOperator(rowSchemaList.get((byte)k));
         ((TableScanOperator)ts).setTableDesc(tableDescList.get((byte)k));
         parentOps[k] = ts;
       }
@@ -256,13 +266,9 @@ public final class GenMRSkewJoinProcesso
       String bigKeyDirPath = bigKeysDirMap.get(src);
       newPlan.getPathToAliases().put(bigKeyDirPath, aliases);
 
-
-
-
       newPlan.getAliasToWork().put(alias, tblScan_op);
       PartitionDesc part = new PartitionDesc(tableDescList.get(src), null);
 
-
       newPlan.getPathToPartitionInfo().put(bigKeyDirPath, part);
       newPlan.getAliasToPartnInfo().put(alias, part);
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java Mon Sep 23 20:40:54 2013
@@ -53,6 +53,7 @@ import org.apache.hadoop.hive.ql.plan.Ma
 import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.serde2.NullStructSerDe;
 
 /**
@@ -128,12 +129,15 @@ public class MetadataOnlyOptimizer imple
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
       TableScanOperator node = (TableScanOperator) nd;
+      TableScanOperator tsOp = (TableScanOperator) nd;
       WalkerCtx walkerCtx = (WalkerCtx) procCtx;
-      if (((node.getNeededColumnIDs() == null) || (node.getNeededColumnIDs().size() == 0))
-          && ((node.getConf() == null) ||
-              (node.getConf().getVirtualCols() == null) ||
-              (node.getConf().getVirtualCols().isEmpty()))) {
-        walkerCtx.setMayBeMetadataOnly(node);
+      List<Integer> colIDs = tsOp.getNeededColumnIDs();
+      TableScanDesc desc = tsOp.getConf();
+      boolean noColNeeded = (colIDs == null) || (colIDs.isEmpty());
+      boolean noVCneeded = (desc == null) || (desc.getVirtualCols() == null)
+                             || (desc.getVirtualCols().isEmpty());
+      if (noColNeeded && noVCneeded) {
+        walkerCtx.setMayBeMetadataOnly(tsOp);
       }
       return nd;
     }

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java Mon Sep 23 20:40:54 2013
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.io;
 
+import static org.junit.Assert.assertEquals;
+
 import java.io.IOException;
 import java.util.Random;
 
@@ -32,7 +34,6 @@ import org.apache.hadoop.io.SequenceFile
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.DefaultCodec;
-import static org.junit.Assert.*;
 
 /**
  * PerformTestRCFileAndSeqFile.
@@ -300,7 +301,7 @@ public class PerformTestRCFileAndSeqFile
 
     java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
     readCols.add(Integer.valueOf(0));
-    ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
+    ColumnProjectionUtils.appendReadColumns(conf, readCols);
     RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
 
     LongWritable rowID = new LongWritable();
@@ -335,7 +336,7 @@ public class PerformTestRCFileAndSeqFile
     java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
     readCols.add(Integer.valueOf(0));
     readCols.add(Integer.valueOf(allColumnsNumber - 1));
-    ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
+    ColumnProjectionUtils.appendReadColumns(conf, readCols);
     RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
 
     LongWritable rowID = new LongWritable();
@@ -370,7 +371,7 @@ public class PerformTestRCFileAndSeqFile
 
     int actualReadCount = 0;
 
-    ColumnProjectionUtils.setFullyReadColumns(conf);
+    ColumnProjectionUtils.setReadAllColumns(conf);
     RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
 
     LongWritable rowID = new LongWritable();

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java Mon Sep 23 20:40:54 2013
@@ -18,6 +18,14 @@
 
 package org.apache.hadoop.hive.ql.io;
 
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 import java.io.DataInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -28,8 +36,6 @@ import java.util.List;
 import java.util.Properties;
 import java.util.Random;
 
-import static org.junit.Assert.*;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -58,11 +64,11 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.FileSplit;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -83,20 +89,21 @@ public class TestRCFile {
 
   // Data
 
-  private Writable[] expectedFieldsData = {
+  private final Writable[] expectedFieldsData = {
       new ByteWritable((byte) 123), new ShortWritable((short) 456),
       new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3),
       new Text("hive and hadoop"), null, null};
 
-  private Object[] expectedPartitalFieldsData = {null, null,
+  private final Object[] expectedPartitalFieldsData = {null, null,
       new IntWritable(789), new LongWritable(1000), null, null, null, null};
-  private BytesRefArrayWritable patialS = new BytesRefArrayWritable();
+  private final BytesRefArrayWritable patialS = new BytesRefArrayWritable();
   private byte[][] bytesArray;
   private BytesRefArrayWritable s;
 
   @Before
   public void setup() throws Exception {
     conf = new Configuration();
+    ColumnProjectionUtils.setReadAllColumns(conf);
     fs = FileSystem.getLocal(conf);
     dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred");
     file = new Path(dir, "test_rcfile");
@@ -511,7 +518,7 @@ public class TestRCFile {
       throws IOException, SerDeException {
     LOG.debug("reading " + count + " records");
     long start = System.currentTimeMillis();
-    ColumnProjectionUtils.setFullyReadColumns(conf);
+    ColumnProjectionUtils.setReadAllColumns(conf);
     RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
 
     LongWritable rowID = new LongWritable();
@@ -556,7 +563,7 @@ public class TestRCFile {
     java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
     readCols.add(Integer.valueOf(2));
     readCols.add(Integer.valueOf(3));
-    ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
+    ColumnProjectionUtils.appendReadColumns(conf, readCols);
     RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
 
     LongWritable rowID = new LongWritable();

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Mon Sep 23 20:40:54 2013
@@ -17,6 +17,20 @@
  */
 package org.apache.hadoop.hive.ql.io.orc;
 
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -30,6 +44,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
 import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -54,21 +69,6 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TestName;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-
 public class TestInputOutputFormat {
 
   Path workDir = new Path(System.getProperty("test.tmp.dir","target/test/tmp"));
@@ -574,7 +574,7 @@ public class TestInputOutputFormat {
     reader.close();
 
     // read just the first column
-    conf.set("hive.io.file.readcolumn.ids", "0");
+    ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(0));
     reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
     key = reader.createKey();
     value = (Writable) reader.createValue();
@@ -589,7 +589,7 @@ public class TestInputOutputFormat {
     reader.close();
 
     // test the mapping of empty string to all columns
-    conf.set("hive.io.file.readcolumn.ids", "");
+    ColumnProjectionUtils.setReadAllColumns(conf);
     reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
     key = reader.createKey();
     value = (Writable) reader.createValue();
@@ -655,7 +655,7 @@ public class TestInputOutputFormat {
     FileInputFormat.setInputPaths(conf, testFilePath.toString());
     InputSplit[] splits = in.getSplits(conf, 1);
     assertEquals(1, splits.length);
-    conf.set("hive.io.file.readcolumn.ids", "1");
+    ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1));
     org.apache.hadoop.mapred.RecordReader reader =
         in.getRecordReader(splits[0], conf, Reporter.NULL);
     Object key = reader.createKey();

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q Mon Sep 23 20:40:54 2013
@@ -1,7 +1,7 @@
 set hive.archive.enabled = true;
 set hive.enforce.bucketing = true;
 
--- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20)
 
 drop table tstsrc;
 drop table tstsrcpart;

Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/binary_table_colserde.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/binary_table_colserde.q?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/binary_table_colserde.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/binary_table_colserde.q Mon Sep 23 20:40:54 2013
@@ -3,6 +3,7 @@ drop table ba_test;
 -- Everything in ba_table1.q + columnar serde in RCFILE.
 
 create table ba_test (ba_key binary, ba_val binary) stored as rcfile;
+alter table ba_test set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
 
 describe extended ba_test;
 

Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out Mon Sep 23 20:40:54 2013
@@ -35,11 +35,13 @@ POSTHOOK: Lineage: tstsrc.value SIMPLE [
 PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11')
 select key, value from srcpart where ds='2008-04-08' and hr='11'
 PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11
 POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11')
 select key, value from srcpart where ds='2008-04-08' and hr='11'
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11
 POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -49,11 +51,13 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12')
 select key, value from srcpart where ds='2008-04-08' and hr='12'
 PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12
 POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12')
 select key, value from srcpart where ds='2008-04-08' and hr='12'
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12
 POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -65,11 +69,13 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11')
 select key, value from srcpart where ds='2008-04-09' and hr='11'
 PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
 PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11
 POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11')
 select key, value from srcpart where ds='2008-04-09' and hr='11'
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
 POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11
 POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -83,11 +89,13 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12')
 select key, value from srcpart where ds='2008-04-09' and hr='12'
 PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12
 POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12')
 select key, value from srcpart where ds='2008-04-09' and hr='12'
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12
 POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -103,12 +111,14 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
 FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
 PREHOOK: type: QUERY
+PREHOOK: Input: default@tstsrcpart
 PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
 PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
 FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tstsrcpart
 POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
@@ -144,12 +154,14 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
 FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
 PREHOOK: type: QUERY
+PREHOOK: Input: default@tstsrcpart
 PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
 PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
 FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tstsrcpart
 POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
@@ -166,10 +178,12 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 48479881068
 PREHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key
 PREHOOK: type: QUERY
+PREHOOK: Input: default@tstsrcpart
 PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tstsrcpart
 POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
 POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -187,12 +201,14 @@ PREHOOK: query: SELECT * FROM tstsrcpart
 WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0'
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tstsrc
+PREHOOK: Input: default@tstsrcpart
 PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key
 WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tstsrc
+POSTHOOK: Input: default@tstsrcpart
 POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
 POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -235,12 +251,14 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
 FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
 PREHOOK: type: QUERY
+PREHOOK: Input: default@tstsrcpart
 PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
 PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
 FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tstsrcpart
 POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
@@ -295,10 +313,12 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
 PREHOOK: type: QUERY
+PREHOOK: Input: default@harbucket
 PREHOOK: Input: default@harbucket@ds=1
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@harbucket
 POSTHOOK: Input: default@harbucket@ds=1
 #### A masked pattern was here ####
 POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ]
@@ -339,10 +359,12 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
 PREHOOK: type: QUERY
+PREHOOK: Input: default@harbucket
 PREHOOK: Input: default@harbucket@ds=1
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@harbucket
 POSTHOOK: Input: default@harbucket@ds=1
 #### A masked pattern was here ####
 POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ]
@@ -383,10 +405,12 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
 PREHOOK: type: QUERY
+PREHOOK: Input: default@harbucket
 PREHOOK: Input: default@harbucket@ds=1
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@harbucket
 POSTHOOK: Input: default@harbucket@ds=1
 #### A masked pattern was here ####
 POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ]
@@ -467,11 +491,13 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
 FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2
 PREHOOK: type: QUERY
+PREHOOK: Input: default@old_name
 PREHOOK: Input: default@old_name@ds=1
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
 FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@old_name
 POSTHOOK: Input: default@old_name@ds=1
 #### A masked pattern was here ####
 POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ]
@@ -511,11 +537,13 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
 PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
 FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2
 PREHOOK: type: QUERY
+PREHOOK: Input: default@new_name
 PREHOOK: Input: default@new_name@ds=1
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
 FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2
 POSTHOOK: type: QUERY
+POSTHOOK: Input: default@new_name
 POSTHOOK: Input: default@new_name@ds=1
 #### A masked pattern was here ####
 POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ]

Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join0.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join0.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join0.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join0.q.out Mon Sep 23 20:40:54 2013
@@ -154,6 +154,7 @@ STAGE PLANS:
     Map Reduce
       Alias -> Map Operator Tree:
 #### A masked pattern was here ####
+          TableScan
             Reduce Output Operator
               sort order: 
               tag: -1

Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join15.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join15.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join15.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join15.q.out Mon Sep 23 20:40:54 2013
@@ -126,6 +126,7 @@ STAGE PLANS:
     Map Reduce
       Alias -> Map Operator Tree:
 #### A masked pattern was here ####
+          TableScan
             Reduce Output Operator
               sort order: 
               tag: -1

Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18.q.out Mon Sep 23 20:40:54 2013
@@ -101,6 +101,7 @@ STAGE PLANS:
     Map Reduce
       Alias -> Map Operator Tree:
         $INTNAME 
+          TableScan
             Reduce Output Operator
               key expressions:
                     expr: _col0
@@ -116,6 +117,7 @@ STAGE PLANS:
                     expr: _col1
                     type: bigint
         $INTNAME1 
+          TableScan
             Reduce Output Operator
               key expressions:
                     expr: _col0
@@ -167,6 +169,7 @@ STAGE PLANS:
     Map Reduce
       Alias -> Map Operator Tree:
 #### A masked pattern was here ####
+          TableScan
             Reduce Output Operator
               sort order: 
               tag: -1

Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out Mon Sep 23 20:40:54 2013
@@ -109,6 +109,7 @@ STAGE PLANS:
     Map Reduce
       Alias -> Map Operator Tree:
         $INTNAME 
+          TableScan
             Reduce Output Operator
               key expressions:
                     expr: _col0
@@ -126,6 +127,7 @@ STAGE PLANS:
                     expr: _col2
                     type: bigint
         $INTNAME1 
+          TableScan
             Reduce Output Operator
               key expressions:
                     expr: _col0
@@ -179,6 +181,7 @@ STAGE PLANS:
     Map Reduce
       Alias -> Map Operator Tree:
 #### A masked pattern was here ####
+          TableScan
             Reduce Output Operator
               sort order: 
               tag: -1

Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join20.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join20.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join20.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join20.q.out Mon Sep 23 20:40:54 2013
@@ -184,6 +184,7 @@ STAGE PLANS:
     Map Reduce
       Alias -> Map Operator Tree:
 #### A masked pattern was here ####
+          TableScan
             Reduce Output Operator
               sort order: 
               tag: -1
@@ -213,7 +214,6 @@ STAGE PLANS:
     Fetch Operator
       limit: -1
 
-
 PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3))
 from (
 SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 
@@ -419,6 +419,7 @@ STAGE PLANS:
     Map Reduce
       Alias -> Map Operator Tree:
 #### A masked pattern was here ####
+          TableScan
             Reduce Output Operator
               sort order: 
               tag: -1
@@ -448,7 +449,6 @@ STAGE PLANS:
     Fetch Operator
       limit: -1
 
-
 PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3))
 from (
 SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3  

Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join27.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join27.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join27.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join27.q.out Mon Sep 23 20:40:54 2013
@@ -201,6 +201,7 @@ STAGE PLANS:
     Map Reduce
       Alias -> Map Operator Tree:
 #### A masked pattern was here ####
+          TableScan
             Reduce Output Operator
               sort order: 
               tag: -1