You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by he...@apache.org on 2011/02/07 20:58:44 UTC
svn commit: r1068083 [1/5] - in /hive/trunk: ./ common/src/java/org/apache/hadoop/hive/conf/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/io/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/java/org/apache/h...

Author: heyongqiang
Date: Mon Feb  7 19:58:43 2011
New Revision: 1068083

URL: http://svn.apache.org/viewvc?rev=1068083&view=rev
Log:
HIVE-1900 a mapper should be able to span multiple partitions (namit via He Yongqiang)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat5.q
    hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat6.q
    hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat7.q
    hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat5.q.out
    hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat6.q.out
    hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat7.q.out
Modified:
    hive/trunk/CHANGES.txt
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
    hive/trunk/ql/src/test/results/clientpositive/input42.q.out
    hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out
    hive/trunk/ql/src/test/results/compiler/plan/case_sensitivity.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input1.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input2.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input3.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input6.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input7.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input9.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input_testsequencefile.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample2.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample3.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample4.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample5.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample6.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/sample7.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/subq.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/union.q.xml

Modified: hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hive/trunk/CHANGES.txt?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/CHANGES.txt (original)
+++ hive/trunk/CHANGES.txt Mon Feb  7 19:58:43 2011
@@ -175,6 +175,9 @@ Trunk -  Unreleased
     HIVE-1961 Make Stats gathering more flexible with timeout and atomicity
     (Ning Zhang via namit)
 
+    HIVE-1900 a mapper should be able to span multiple partitions
+    (namit via He Yongqiang)
+
   IMPROVEMENTS
 
     HIVE-1235 use Ivy for fetching HBase dependencies (John Sichi via cws)

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Mon Feb  7 19:58:43 2011
@@ -373,6 +373,11 @@ public class HiveConf extends Configurat
     HIVE_ERROR_ON_EMPTY_PARTITION("hive.error.on.empty.partition", false),
 
     HIVE_INDEX_IGNORE_HDFS_LOC("hive.index.compact.file.ignore.hdfs", false),
+
+    // temporary variable for testing. This is added just to turn off this feature in case of a bug in
+    // deployment. It has not been documented in hive-default.xml intentionally, this should be removed
+    // once the feature is stable
+    HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS("hive.mapper.cannot.span.multiple.partitions", false),
     ;
 
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java Mon Feb  7 19:58:43 2011
@@ -107,6 +107,7 @@ public class ExecMapperContext {
   }
 
   public String getCurrentInputFile() {
+    currentInputFile = this.ioCxt.getInputFile();
     return currentInputFile;
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java Mon Feb  7 19:58:43 2011
@@ -194,6 +194,23 @@ public class MapJoinOperator extends Abs
     }
   }
 
+  // Load the hash table
+  @Override
+  public void cleanUpInputFileChangedOp() throws HiveException {
+    try {
+      if (firstRow) {
+        // generate the map metadata
+        generateMapMetaData();
+        firstRow = false;
+      }
+
+      loadHashTable();
+    } catch (SerDeException e) {
+      e.printStackTrace();
+      throw new HiveException(e);
+    }
+  }
+
   @Override
   public void processOp(Object row, int tag) throws HiveException {
 
@@ -203,9 +220,6 @@ public class MapJoinOperator extends Abs
         generateMapMetaData();
         firstRow = false;
       }
-      if (this.getExecContext().inputFileChanged()) {
-        loadHashTable();
-      }
 
       // get alias
       alias = order[tag];

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java Mon Feb  7 19:58:43 2011
@@ -25,6 +25,8 @@ import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.HashSet;
 import java.util.Map.Entry;
 import java.util.Properties;
 
@@ -79,6 +81,7 @@ public class MapOperator extends Operato
   private transient boolean isPartitioned;
   private transient boolean hasVC;
   private Map<MapInputPath, MapOpCtx> opCtxMap;
+  private Set<MapInputPath> listInputPaths = new HashSet<MapInputPath>();
 
   private Map<Operator<? extends Serializable>, java.util.ArrayList<String>> operatorToPaths;
 
@@ -121,6 +124,15 @@ public class MapOperator extends Operato
     public int hashCode() {
       return (op == null) ? 0 : op.hashCode();
     }
+
+    public Operator<? extends Serializable> getOp() {
+      return op;
+    }
+
+    public void setOp(Operator<? extends Serializable> op) {
+      this.op = op;
+    }
+
   }
 
   private static class MapOpCtx {
@@ -271,10 +283,70 @@ public class MapOperator extends Operato
     return opCtx;
   }
 
+  /**
+   * Set the inspectors given a input. Since a mapper can span multiple partitions, the inspectors
+   * need to be changed if the input changes
+   **/
+  private void setInspectorInput(MapInputPath inp) {
+    Operator<? extends Serializable> op = inp.getOp();
+
+    deserializer  = opCtxMap.get(inp).getDeserializer();
+    isPartitioned = opCtxMap.get(inp).isPartitioned();
+    rowWithPart   = opCtxMap.get(inp).getRowWithPart();
+    rowObjectInspector = opCtxMap.get(inp).getRowObjectInspector();
+    if (listInputPaths.contains(inp)) {
+      return;
+    }
+
+    listInputPaths.add(inp);
+    StructObjectInspector rawRowObjectInspector = opCtxMap.get(inp).rawRowObjectInspector;
+    StructObjectInspector partObjectInspector = opCtxMap.get(inp).partObjectInspector;
+    if (op instanceof TableScanOperator) {
+      TableScanOperator tsOp = (TableScanOperator) op;
+      TableScanDesc tsDesc = tsOp.getConf();
+      if(tsDesc != null) {
+        this.vcs = tsDesc.getVirtualCols();
+        if (vcs != null && vcs.size() > 0) {
+          this.hasVC = true;
+          List<String> vcNames = new ArrayList<String>(vcs.size());
+          this.vcValues = new Writable[vcs.size()];
+          List<ObjectInspector> vcsObjectInspectors = new ArrayList<ObjectInspector>(vcs.size());
+          for (int i = 0; i < vcs.size(); i++) {
+            VirtualColumn vc = vcs.get(i);
+            vcsObjectInspectors.add(
+              PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+                ((PrimitiveTypeInfo) vc.getTypeInfo()).getPrimitiveCategory()));
+            vcNames.add(vc.getName());
+          }
+          StructObjectInspector vcStructObjectInspector = ObjectInspectorFactory
+            .getStandardStructObjectInspector(vcNames,
+                                              vcsObjectInspectors);
+          if (isPartitioned) {
+            this.rowWithPartAndVC = new Object[3];
+            this.rowWithPartAndVC[1] = this.rowWithPart[1];
+          } else {
+            this.rowWithPartAndVC = new Object[2];
+          }
+          if(partObjectInspector == null) {
+            this.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
+                                        .asList(new StructObjectInspector[] {
+                                            rowObjectInspector, vcStructObjectInspector }));
+          } else {
+            this.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
+                                        .asList(new StructObjectInspector[] {
+                                            rawRowObjectInspector, partObjectInspector, vcStructObjectInspector }));
+          }
+          opCtxMap.get(inp).rowObjectInspector = this.rowObjectInspector;
+        }
+      }
+    }
+  }
+
   public void setChildren(Configuration hconf) throws HiveException {
 
     Path fpath = new Path((new Path(HiveConf.getVar(hconf,
         HiveConf.ConfVars.HADOOPMAPFILENAME))).toUri().getPath());
+
     ArrayList<Operator<? extends Serializable>> children = new ArrayList<Operator<? extends Serializable>>();
     opCtxMap = new HashMap<MapInputPath, MapOpCtx>();
     operatorToPaths = new HashMap<Operator<? extends Serializable>, java.util.ArrayList<String>>();
@@ -311,51 +383,7 @@ public class MapOperator extends Operato
             LOG.info("dump " + op.getName() + " "
                 + opCtxMap.get(inp).getRowObjectInspector().getTypeName());
             if (!done) {
-              deserializer = opCtxMap.get(inp).getDeserializer();
-              isPartitioned = opCtxMap.get(inp).isPartitioned();
-              rowWithPart = opCtxMap.get(inp).getRowWithPart();
-              rowObjectInspector = opCtxMap.get(inp).getRowObjectInspector();
-              StructObjectInspector rawRowObjectInspector = opCtxMap.get(inp).rawRowObjectInspector;
-              StructObjectInspector partObjectInspector = opCtxMap.get(inp).partObjectInspector;
-              if (op instanceof TableScanOperator) {
-                TableScanOperator tsOp = (TableScanOperator) op;
-                TableScanDesc tsDesc = tsOp.getConf();
-                if(tsDesc != null) {
-                  this.vcs = tsDesc.getVirtualCols();
-                  if (vcs != null && vcs.size() > 0) {
-                    this.hasVC = true;
-                    List<String> vcNames = new ArrayList<String>(vcs.size());
-                    this.vcValues = new Writable[vcs.size()];
-                    List<ObjectInspector> vcsObjectInspectors = new ArrayList<ObjectInspector>(vcs.size());
-                    for (int i = 0; i < vcs.size(); i++) {
-                      VirtualColumn vc = vcs.get(i);
-                      vcsObjectInspectors.add(
-                          PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
-                              ((PrimitiveTypeInfo) vc.getTypeInfo()).getPrimitiveCategory()));
-                      vcNames.add(vc.getName());
-                    }
-                    StructObjectInspector vcStructObjectInspector = ObjectInspectorFactory
-                        .getStandardStructObjectInspector(vcNames,
-                            vcsObjectInspectors);
-                    if (isPartitioned) {
-                      this.rowWithPartAndVC = new Object[3];
-                      this.rowWithPartAndVC[1] = this.rowWithPart[1];
-                    } else {
-                      this.rowWithPartAndVC = new Object[2];
-                    }
-                    if(partObjectInspector == null) {
-                      this.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
-                          .asList(new StructObjectInspector[] {
-                              rowObjectInspector, vcStructObjectInspector }));
-                    } else {
-                      this.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
-                          .asList(new StructObjectInspector[] {
-                              rawRowObjectInspector, partObjectInspector, vcStructObjectInspector }));
-                    }
-                    opCtxMap.get(inp).rowObjectInspector = this.rowObjectInspector;
-                  }
-                }
-              }
+              setInspectorInput(inp);
               done = true;
             }
           }
@@ -430,7 +458,41 @@ public class MapOperator extends Operato
     }
   }
 
+  // Change the serializer etc. since it is a new file, and split can span
+  // multiple files/partitions.
+  public void cleanUpInputFileChangedOp() throws HiveException {
+    Path fpath = new Path((new Path(this.getExecContext().getCurrentInputFile()))
+                          .toUri().getPath());
+
+    for (String onefile : conf.getPathToAliases().keySet()) {
+      Path onepath = new Path(new Path(onefile).toUri().getPath());
+      // check for the operators who will process rows coming to this Map
+      // Operator
+      if (!onepath.toUri().relativize(fpath.toUri()).equals(fpath.toUri())) {
+        String onealias = conf.getPathToAliases().get(onefile).get(0);
+        Operator<? extends Serializable> op =
+          conf.getAliasToWork().get(onealias);
+
+        LOG.info("Processing alias " + onealias + " for file " + onefile);
+
+        MapInputPath inp = new MapInputPath(onefile, onealias, op);
+        setInspectorInput(inp);
+        break;
+      }
+    }
+  }
+
   public void process(Writable value) throws HiveException {
+    // A mapper can span multiple files/partitions.
+    // The serializers need to be reset if the input file changed
+    if ((this.getExecContext() != null) &&
+        this.getExecContext().inputFileChanged()) {
+      LOG.info("Processing path " + this.getExecContext().getCurrentInputFile());
+
+      // The child operators cleanup if input file has changed
+      cleanUpInputFileChanged();
+    }
+
     Object row = null;
     try {
       if (this.hasVC) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Mon Feb  7 19:58:43 2011
@@ -1236,4 +1236,22 @@ public abstract class Operator<T extends
       }
     }
   }
+
+  // The input file has changed - every operator can invoke specific action
+  // for each input file
+  public void cleanUpInputFileChanged() throws HiveException {
+    this.cleanUpInputFileChangedOp();
+    if(this.childOperators != null) {
+      for (int i = 0; i<this.childOperators.size();i++) {
+        Operator<? extends Serializable> op = this.childOperators.get(i);
+        op.cleanUpInputFileChanged();
+      }
+    }
+  }
+
+  // If a operator needs to invoke specific cleanup, that operator can override
+  // this method
+  public void cleanUpInputFileChangedOp() throws HiveException {
+  }
+
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Mon Feb  7 19:58:43 2011
@@ -67,6 +67,7 @@ public class SMBMapJoinOperator extends 
   private transient boolean[] fetchOpDone;
   private transient boolean[] foundNextKeyGroup;
   transient boolean firstFetchHappened = false;
+  private transient boolean inputFileChanged = false;
   transient boolean localWorkInited = false;
 
   public SMBMapJoinOperator() {
@@ -85,6 +86,7 @@ public class SMBMapJoinOperator extends 
     closeCalled = false;
 
     this.firstFetchHappened = false;
+    this.inputFileChanged = false;
 
     // get the largest table alias from order
     int maxAlias = 0;
@@ -177,11 +179,17 @@ public class SMBMapJoinOperator extends 
     }
   }
 
+  // The input file has changed - load the correct hash bucket
+  @Override
+  public void cleanUpInputFileChangedOp() throws HiveException {
+    inputFileChanged = true;
+  }
+
   @Override
   public void processOp(Object row, int tag) throws HiveException {
 
     if (tag == posBigTable) {
-      if (this.getExecContext().inputFileChanged()) {
+      if (inputFileChanged) {
         if (firstFetchHappened) {
           // we need to first join and flush out data left by the previous file.
           joinFinalLeftData();
@@ -194,6 +202,7 @@ public class SMBMapJoinOperator extends 
           setUpFetchOpContext(fetchOp, alias);
         }
         firstFetchHappened = false;
+        inputFileChanged = false;
       }
     }
 
@@ -531,8 +540,7 @@ public class SMBMapJoinOperator extends 
     }
     closeCalled = true;
 
-    if ((this.getExecContext() != null && this.getExecContext().inputFileChanged())
-        || !firstFetchHappened) {
+    if (inputFileChanged || !firstFetchHappened) {
       //set up the fetch operator for the new input file.
       for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
         String alias = entry.getKey();
@@ -546,6 +554,7 @@ public class SMBMapJoinOperator extends 
           fetchNextGroup(t);
         }
       }
+      inputFileChanged = false;
     }
 
     joinFinalLeftData();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java Mon Feb  7 19:58:43 2011
@@ -51,6 +51,7 @@ public class TableScanOperator extends O
   private transient Configuration hconf;
   private transient Stat stat;
   private transient String partitionSpecs;
+  private transient boolean inputFileChanged = false;
   private TableDesc tableDesc;
 
 
@@ -77,8 +78,16 @@ public class TableScanOperator extends O
     forward(row, inputObjInspectors[tag]);
   }
 
+  // Change the table partition for collecting stats
+  @Override
+  public void cleanUpInputFileChangedOp() throws HiveException {
+    inputFileChanged = true;
+  }
+
   private void gatherStats(Object row) {
-    if (stat == null) { // first row/call
+    // first row/call or a new partition
+    if ((stat == null) || inputFileChanged) {
+      inputFileChanged = false;
       stat = new Stat();
       if (conf.getPartColumns() == null || conf.getPartColumns().size() == 0) {
         partitionSpecs = "";
@@ -122,6 +131,8 @@ public class TableScanOperator extends O
   @Override
   protected void initializeOp(Configuration hconf) throws HiveException {
     initializeChildren(hconf);
+    inputFileChanged = false;
+
     if (conf == null) {
       return;
     }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Mon Feb  7 19:58:43 2011
@@ -22,9 +22,12 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.File;
 import java.io.IOException;
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.LinkedList;
+import java.util.List;
+import java.util.HashMap;
 import java.util.Map;
 import java.util.Queue;
 import java.util.Set;
@@ -50,9 +53,7 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.TextInputFormat;
-
-
-
+import org.apache.hadoop.hive.ql.exec.Operator;
 
 /**
  * CombineHiveInputFormat is a parameterized InputFormat which looks at the path
@@ -211,7 +212,37 @@ public class CombineHiveInputFormat<K ex
       out.writeUTF(inputFormatClassName);
     }
   }
-  
+
+  // Splits are not shared across different partitions with different input formats.
+  // For example, 2 partitions (1 sequencefile and 1 rcfile) will have 2 different splits
+  private static class CombinePathInputFormat {
+    private List<Operator<? extends Serializable>> opList;
+    private String inputFormatClassName;
+
+    public CombinePathInputFormat(List<Operator<? extends Serializable>> opList,
+                                  String inputFormatClassName) {
+      this.opList = opList;
+      this.inputFormatClassName = inputFormatClassName;
+    }
+
+    public boolean equals(Object o) {
+      if (o instanceof CombinePathInputFormat) {
+        CombinePathInputFormat mObj = (CombinePathInputFormat)o;
+        if (mObj == null) {
+          return false;
+        }
+        return opList.equals(mObj.opList) &&
+          inputFormatClassName.equals(mObj.inputFormatClassName);
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      return (opList == null) ? 0 : opList.hashCode();
+    }
+  }
+
   /**
    * Create Hive splits based on CombineFileSplit.
    */
@@ -219,6 +250,9 @@ public class CombineHiveInputFormat<K ex
   public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
 
     init(job);
+    Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
+    Map<String, Operator<? extends Serializable>> aliasToWork =
+      mrwork.getAliasToWork();
     CombineFileInputFormatShim combine = ShimLoader.getHadoopShims()
         .getCombineFileInputFormat();
 
@@ -234,7 +268,10 @@ public class CombineHiveInputFormat<K ex
     // combine splits only from same tables and same partitions. Do not combine splits from multiple
     // tables or multiple partitions.
     Path[] paths = combine.getInputPathsShim(job);
+    Map<CombinePathInputFormat, CombineFilter> poolMap =
+      new HashMap<CombinePathInputFormat, CombineFilter>();
     Set<Path> poolSet = new HashSet<Path>();
+
     for (Path path : paths) {
 
       PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
@@ -246,6 +283,7 @@ public class CombineHiveInputFormat<K ex
 
       // Use HiveInputFormat if any of the paths is not splittable
       Class inputFormatClass = part.getInputFileFormatClass();
+      String inputFormatClassName = inputFormatClass.getName();
       InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
 
       // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not,
@@ -288,25 +326,54 @@ public class CombineHiveInputFormat<K ex
         return super.getSplits(job, numSplits);
       }
 
+      Path filterPath = path;
+
       // In the case of tablesample, the input paths are pointing to files rather than directories.
       // We need to get the parent directory as the filtering path so that all files in the same
       // parent directory will be grouped into one pool but not files from different parent
       // directories. This guarantees that a split will combine all files in the same partition
-      // but won't cross multiple partitions.
-      Path filterPath = path;
-      if (!path.getFileSystem(job).getFileStatus(path).isDir()) { // path is not directory
+      // but won't cross multiple partitions if the user has asked so.
+      if (mrwork.isMapperCannotSpanPartns() &&
+          !path.getFileSystem(job).getFileStatus(path).isDir()) { // path is not directory
         filterPath = path.getParent();
       }
-      if (!poolSet.contains(filterPath)) {
-        LOG.info("CombineHiveInputSplit creating pool for " + path +
-            "; using filter path " + filterPath);
-        combine.createPool(job, new CombineFilter(filterPath));
+
+      // Does a pool exist for this path already
+      CombineFilter f = null;
+      List<Operator<? extends Serializable>> opList = null;
+      boolean done = false;
+
+      if (!mrwork.isMapperCannotSpanPartns()) {
+        opList = HiveFileFormatUtils.doGetAliasesFromPath(
+                   pathToAliases, aliasToWork, filterPath);
+        f = poolMap.get(new CombinePathInputFormat(opList, inputFormatClassName));
+      }
+      else {
+        if (poolSet.contains(filterPath)) {
+          LOG.info("CombineHiveInputSplit: pool is already created for " + path +
+                   "; using filter path " + filterPath);
+          done = true;
+        }
         poolSet.add(filterPath);
-      } else {
-        LOG.info("CombineHiveInputSplit: pool is already created for " + path +
-            "; using filter path " + filterPath);
+      }
+
+      if (!done) {
+        if (f == null) {
+          f = new CombineFilter(filterPath);
+          LOG.info("CombineHiveInputSplit creating pool for " + path +
+                   "; using filter path " + filterPath);
+          combine.createPool(job, f);
+          if (!mrwork.isMapperCannotSpanPartns()) {
+            poolMap.put(new CombinePathInputFormat(opList, inputFormatClassName), f);
+          }
+        } else {
+          LOG.info("CombineHiveInputSplit: pool is already created for " + path +
+                   "; using filter path " + filterPath);
+          f.addPath(filterPath);
+        }
       }
     }
+
     InputSplitShim[] iss = combine.getSplits(job, 1);
     for (InputSplitShim is : iss) {
       CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is);
@@ -314,7 +381,6 @@ public class CombineHiveInputFormat<K ex
     }
 
     LOG.info("number of splits " + result.size());
-
     return result.toArray(new CombineHiveInputSplit[result.size()]);
   }
 
@@ -351,7 +417,7 @@ public class CombineHiveInputFormat<K ex
   }
 
   static class CombineFilter implements PathFilter {
-    private final String pString;
+    private List<String> pStrings = new ArrayList<String>();
 
     // store a path prefix in this TestFilter
     // PRECONDITION: p should always be a directory
@@ -359,21 +425,33 @@ public class CombineHiveInputFormat<K ex
       // we need to keep the path part only because the Hadoop CombineFileInputFormat will
       // pass the path part only to accept().
       // Trailing the path with a separator to prevent partial matching.
-      pString = p.toUri().getPath().toString() + File.separator;;
+      addPath(p);
+    }
+
+    public void addPath(Path p) {
+      String pString = p.toUri().getPath().toString() + File.separator;;
+      pStrings.add(pString);
     }
 
     // returns true if the specified path matches the prefix stored
     // in this TestFilter.
     public boolean accept(Path path) {
-      if (path.toString().indexOf(pString) == 0) {
-        return true;
+      for (String pString : pStrings) {
+        if (path.toString().indexOf(pString) == 0) {
+          return true;
+        }
       }
       return false;
     }
 
     @Override
     public String toString() {
-      return "PathFilter:" + pString;
+      StringBuilder s = new StringBuilder();
+      s.append("PathFilter: ");
+      for (String pString : pStrings) {
+        s.append(pString + " ");
+      }
+      return s.toString();
     }
   }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java Mon Feb  7 19:58:43 2011
@@ -20,7 +20,9 @@ package org.apache.hadoop.hive.ql.io;
 
 import java.io.File;
 import java.io.IOException;
+import java.io.Serializable;
 import java.util.ArrayList;
+import java.util.List;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Properties;
@@ -31,6 +33,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
+import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
@@ -244,7 +247,7 @@ public final class HiveFileFormatUtils {
     }
     return null;
   }
-  
+
   public static PartitionDesc getPartitionDescFromPathRecursively(
       Map<String, PartitionDesc> pathToPartitionInfo, Path dir,
       Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> cacheMap)
@@ -252,7 +255,7 @@ public final class HiveFileFormatUtils {
     return getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
         cacheMap, false);
   }
-  
+
   public static PartitionDesc getPartitionDescFromPathRecursively(
       Map<String, PartitionDesc> pathToPartitionInfo, Path dir,
       Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> cacheMap,
@@ -267,11 +270,11 @@ public final class HiveFileFormatUtils {
       if (cacheMap != null) {
         newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo);
       }
-      
+
       if (newPathToPartitionInfo == null) { // still null
         newPathToPartitionInfo = new HashMap<String, PartitionDesc>();
         populateNewPartitionDesc(pathToPartitionInfo, newPathToPartitionInfo);
-        
+
         if (cacheMap != null) {
           cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo);
         }
@@ -309,7 +312,7 @@ public final class HiveFileFormatUtils {
       //      LOG.warn("exact match not found, try ripping input path's theme and authority");
       part = pathToPartitionInfo.get(dirPath);
     }
-    
+
     if (part == null) {
       String dirStr = dir.toString();
       int dirPathIndex = dirPath.lastIndexOf(File.separator);
@@ -333,6 +336,68 @@ public final class HiveFileFormatUtils {
     return part;
   }
 
+  private static boolean foundAlias(Map<String, ArrayList<String>> pathToAliases,
+                                    String path) {
+    List<String> aliases = pathToAliases.get(path);
+    if ((aliases == null) || (aliases.isEmpty())) {
+      return false;
+    }
+    return true;
+  }
+
+  private static String getMatchingPath(Map<String, ArrayList<String>> pathToAliases,
+                                        Path dir) {
+    // First find the path to be searched
+    String path = dir.toString();
+    if (foundAlias(pathToAliases, path)) {
+      return path;
+    }
+
+    String dirPath = dir.toUri().getPath();
+    if (foundAlias(pathToAliases, dirPath)) {
+      return dirPath;
+    }
+    path = dirPath;
+
+    String dirStr = dir.toString();
+    int dirPathIndex = dirPath.lastIndexOf(File.separator);
+    int dirStrIndex = dirStr.lastIndexOf(File.separator);
+    while (dirPathIndex >= 0 && dirStrIndex >= 0) {
+      dirStr = dirStr.substring(0, dirStrIndex);
+      dirPath = dirPath.substring(0, dirPathIndex);
+      //first try full match
+      if (foundAlias(pathToAliases, dirStr)) {
+        return dirStr;
+      }
+      if (foundAlias(pathToAliases, dirPath)) {
+        return dirPath;
+      }
+      dirPathIndex = dirPath.lastIndexOf(File.separator);
+      dirStrIndex = dirStr.lastIndexOf(File.separator);
+    }
+    return null;
+  }
+
+  /**
+   * Get the list of operatators from the opeerator tree that are needed for the path
+   * @param pathToAliases  mapping from path to aliases
+   * @param aliasToWork    The operator tree to be invoked for a given alias
+   * @param dir            The path to look for
+   **/
+  public static List<Operator<? extends Serializable>> doGetAliasesFromPath(
+    Map<String, ArrayList<String>> pathToAliases,
+    Map<String, Operator<? extends Serializable>> aliasToWork, Path dir) {
+
+    String path = getMatchingPath(pathToAliases, dir);
+    List<Operator<? extends Serializable>> opList =
+      new ArrayList<Operator<? extends Serializable>>();
+    List<String> aliases = pathToAliases.get(path);
+    for (String alias : aliases) {
+      opList.add(aliasToWork.get(alias));
+    }
+    return opList;
+  }
+
   private HiveFileFormatUtils() {
     // prevent instantiation
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Mon Feb  7 19:58:43 2011
@@ -288,6 +288,9 @@ public class HiveInputFormat<K extends W
         result.add(new HiveInputSplit(is, inputFormatClass.getName()));
       }
     }
+
+    LOG.info("number of splits " + result.size());
+
     return result.toArray(new HiveInputSplit[result.size()]);
   }
 
@@ -365,7 +368,7 @@ public class HiveInputFormat<K extends W
     pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath,
       splitPathWithNoSchema, false);
   }
-  
+
   protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass,
       String splitPath, String splitPathWithNoSchema, boolean nonNative) {
     if (this.mrwork == null) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java Mon Feb  7 19:58:43 2011
@@ -426,6 +426,7 @@ public class GenMRFileSink1 implements N
     cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null));
     cplan.setNumReduceTasks(0);
     cplan.getAliasToWork().put(inputDir, topOp);
+    cplan.setMapperCannotSpanPartns(true);
 
     return cplan;
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Mon Feb  7 19:58:43 2011
@@ -765,6 +765,11 @@ public final class GenMapRedUtils {
    */
   public static MapredWork getMapRedWork(HiveConf conf) {
     MapredWork work = new MapredWork();
+    // This code has been only added for testing
+    boolean mapperCannotSpanPartns =
+      conf.getBoolVar(
+        HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
+    work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
     work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>());
     work.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>());
     work.setAliasToWork(new LinkedHashMap<String, Operator<? extends Serializable>>());

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java Mon Feb  7 19:58:43 2011
@@ -58,6 +58,7 @@ import org.apache.hadoop.hive.ql.plan.Ta
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 
 /**
  * GenMRSkewJoinProcessor.
@@ -232,6 +233,13 @@ public final class GenMRSkewJoinProcesso
     for (int i = 0; i < numAliases - 1; i++) {
       Byte src = tags[i];
       MapredWork newPlan = PlanUtils.getMapRedWork();
+
+      // This code has been only added for testing
+      boolean mapperCannotSpanPartns =
+        parseCtx.getConf().getBoolVar(
+          HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
+      newPlan.setMapperCannotSpanPartns(mapperCannotSpanPartns);
+
       MapredWork clonePlan = null;
       try {
         String xmlPlan = currPlan.toXML();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java Mon Feb  7 19:58:43 2011
@@ -29,6 +29,9 @@ import org.apache.hadoop.hive.ql.exec.Op
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.parse.OpParseContext;
 import org.apache.hadoop.hive.ql.parse.QBJoinTree;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 
 /**
  * MapredWork.
@@ -75,6 +78,8 @@ public class MapredWork implements Seria
 
   private QBJoinTree joinTree;
 
+  private boolean mapperCannotSpanPartns;
+
   public MapredWork() {
     aliasToPartnInfo = new LinkedHashMap<String, PartitionDesc>();
   }
@@ -339,6 +344,14 @@ public class MapredWork implements Seria
     return this.gatheringStats;
   }
 
+  public void setMapperCannotSpanPartns(boolean mapperCannotSpanPartns) {
+    this.mapperCannotSpanPartns = mapperCannotSpanPartns;
+  }
+
+  public boolean isMapperCannotSpanPartns() {
+    return this.mapperCannotSpanPartns;
+  }
+
   public String getTmpHDFSFileURI() {
     return tmpHDFSFileURI;
   }

Added: hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat5.q?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat5.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat5.q Mon Feb  7 19:58:43 2011
@@ -0,0 +1,14 @@
+set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+
+create table partition_test_partitioned(key string, value string) partitioned by (dt string);
+
+alter table partition_test_partitioned set fileformat rcfile;
+insert overwrite table partition_test_partitioned partition(dt=101) select * from src1;
+alter table partition_test_partitioned set fileformat Sequencefile;
+insert overwrite table partition_test_partitioned partition(dt=102) select * from src1;
+
+select dt, count(1) from partition_test_partitioned where dt is not null group by dt;
+
+insert overwrite table partition_test_partitioned partition(dt=103) select * from src1;
+
+select dt, count(1) from partition_test_partitioned where dt is not null group by dt;

Added: hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat6.q?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat6.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat6.q Mon Feb  7 19:58:43 2011
@@ -0,0 +1,19 @@
+set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+
+create table partition_test_partitioned(key string, value string) partitioned by (dt string);
+
+alter table partition_test_partitioned set fileformat rcfile;
+insert overwrite table partition_test_partitioned partition(dt=101) select * from src1;
+alter table partition_test_partitioned set fileformat Sequencefile;
+
+insert overwrite table partition_test_partitioned partition(dt=102) select * from src1;
+
+select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=101 and key < 20)s;
+
+select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=102 and key < 20)s;

Added: hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat7.q?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat7.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat7.q Mon Feb  7 19:58:43 2011
@@ -0,0 +1,12 @@
+set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+
+create table partition_test_partitioned(key string, value string) partitioned by (dt string);
+
+alter table partition_test_partitioned set fileformat rcfile;
+insert overwrite table partition_test_partitioned partition(dt=101) select * from src1;
+
+select count(1) from partition_test_partitioned  a join partition_test_partitioned  b on a.key = b.key
+where a.dt = '101' and b.dt = '101';
+
+select count(1) from partition_test_partitioned  a join partition_test_partitioned  b on a.key = b.key
+where a.dt = '101' and b.dt = '101' and a.key < 100;
\ No newline at end of file

Modified: hive/trunk/ql/src/test/results/clientpositive/input42.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input42.q.out?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input42.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/input42.q.out Mon Feb  7 19:58:43 2011
@@ -54,10 +54,10 @@ STAGE PLANS:
                         type: string
       Needs Tagging: false
       Path -> Alias:
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
       Path -> Partition:
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 
           Partition
             base file name: hr=11
             input format: org.apache.hadoop.mapred.TextInputFormat
@@ -71,13 +71,13 @@ STAGE PLANS:
               columns.types string:string
               file.inputformat org.apache.hadoop.mapred.TextInputFormat
               file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+              location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
               name srcpart
               partition_columns ds/hr
               serialization.ddl struct srcpart { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              transient_lastDdlTime 1288389287
+              transient_lastDdlTime 1296766190
             serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
           
               input format: org.apache.hadoop.mapred.TextInputFormat
@@ -88,17 +88,17 @@ STAGE PLANS:
                 columns.types string:string
                 file.inputformat org.apache.hadoop.mapred.TextInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+                location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
                 name srcpart
                 partition_columns ds/hr
                 serialization.ddl struct srcpart { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                transient_lastDdlTime 1288389287
+                transient_lastDdlTime 1296766190
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: srcpart
             name: srcpart
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
           Partition
             base file name: hr=12
             input format: org.apache.hadoop.mapred.TextInputFormat
@@ -112,13 +112,13 @@ STAGE PLANS:
               columns.types string:string
               file.inputformat org.apache.hadoop.mapred.TextInputFormat
               file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+              location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
               name srcpart
               partition_columns ds/hr
               serialization.ddl struct srcpart { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              transient_lastDdlTime 1288389287
+              transient_lastDdlTime 1296766190
             serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
           
               input format: org.apache.hadoop.mapred.TextInputFormat
@@ -129,13 +129,13 @@ STAGE PLANS:
                 columns.types string:string
                 file.inputformat org.apache.hadoop.mapred.TextInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+                location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
                 name srcpart
                 partition_columns ds/hr
                 serialization.ddl struct srcpart { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                transient_lastDdlTime 1288389287
+                transient_lastDdlTime 1296766190
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: srcpart
             name: srcpart
@@ -144,9 +144,9 @@ STAGE PLANS:
           File Output Operator
             compressed: false
             GlobalTableId: 0
-            directory: file:/tmp/sdong/hive_2010-10-29_15-31-51_423_2950637669981243052/-ext-10001
+            directory: file:/tmp/njain/hive_2011-02-03_12-49-59_467_3796150470392177289/-ext-10001
             NumFilesPerFileSink: 1
-            Stats Publishing Key Prefix: file:/tmp/sdong/hive_2010-10-29_15-31-51_423_2950637669981243052/-ext-10001/
+            Stats Publishing Key Prefix: file:/tmp/njain/hive_2011-02-03_12-49-59_467_3796150470392177289/-ext-10001/
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -167,12 +167,12 @@ PREHOOK: query: select * from srcpart a 
 PREHOOK: type: QUERY
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-51_551_23639250440929738/-mr-10000
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-49-59_801_525167415497485863/-mr-10000
 POSTHOOK: query: select * from srcpart a where a.ds='2008-04-08' order by a.key, a.hr
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-51_551_23639250440929738/-mr-10000
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-49-59_801_525167415497485863/-mr-10000
 0	val_0	2008-04-08	11
 0	val_0	2008-04-08	11
 0	val_0	2008-04-08	11
@@ -1234,10 +1234,10 @@ STAGE PLANS:
                           type: string
       Needs Tagging: false
       Path -> Alias:
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
       Path -> Partition:
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 
           Partition
             base file name: hr=11
             input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1251,13 +1251,13 @@ STAGE PLANS:
               columns.types string:string
               file.inputformat org.apache.hadoop.mapred.TextInputFormat
               file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+              location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
               name srcpart
               partition_columns ds/hr
               serialization.ddl struct srcpart { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              transient_lastDdlTime 1288389287
+              transient_lastDdlTime 1296766190
             serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
           
               input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1268,17 +1268,17 @@ STAGE PLANS:
                 columns.types string:string
                 file.inputformat org.apache.hadoop.mapred.TextInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+                location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
                 name srcpart
                 partition_columns ds/hr
                 serialization.ddl struct srcpart { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                transient_lastDdlTime 1288389287
+                transient_lastDdlTime 1296766190
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: srcpart
             name: srcpart
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
           Partition
             base file name: hr=12
             input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1292,13 +1292,13 @@ STAGE PLANS:
               columns.types string:string
               file.inputformat org.apache.hadoop.mapred.TextInputFormat
               file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+              location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
               name srcpart
               partition_columns ds/hr
               serialization.ddl struct srcpart { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              transient_lastDdlTime 1288389287
+              transient_lastDdlTime 1296766190
             serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
           
               input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1309,13 +1309,13 @@ STAGE PLANS:
                 columns.types string:string
                 file.inputformat org.apache.hadoop.mapred.TextInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+                location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
                 name srcpart
                 partition_columns ds/hr
                 serialization.ddl struct srcpart { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                transient_lastDdlTime 1288389287
+                transient_lastDdlTime 1296766190
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: srcpart
             name: srcpart
@@ -1324,9 +1324,9 @@ STAGE PLANS:
           File Output Operator
             compressed: false
             GlobalTableId: 0
-            directory: file:/tmp/sdong/hive_2010-10-29_15-31-55_370_4976912681665825327/-ext-10001
+            directory: file:/tmp/njain/hive_2011-02-03_12-50-03_142_654060154054043640/-ext-10001
             NumFilesPerFileSink: 1
-            Stats Publishing Key Prefix: file:/tmp/sdong/hive_2010-10-29_15-31-55_370_4976912681665825327/-ext-10001/
+            Stats Publishing Key Prefix: file:/tmp/njain/hive_2011-02-03_12-50-03_142_654060154054043640/-ext-10001/
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1347,12 +1347,12 @@ PREHOOK: query: select * from srcpart a 
 PREHOOK: type: QUERY
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-55_529_6657949327034269590/-mr-10000
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-50-03_306_2508303776661842458/-mr-10000
 POSTHOOK: query: select * from srcpart a where a.ds='2008-04-08' and key < 200 order by a.key, a.hr
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-55_529_6657949327034269590/-mr-10000
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-50-03_306_2508303776661842458/-mr-10000
 0	val_0	2008-04-08	11
 0	val_0	2008-04-08	11
 0	val_0	2008-04-08	11
@@ -1787,10 +1787,10 @@ STAGE PLANS:
                         type: string
       Needs Tagging: false
       Path -> Alias:
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
       Path -> Partition:
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 
           Partition
             base file name: hr=11
             input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1804,13 +1804,13 @@ STAGE PLANS:
               columns.types string:string
               file.inputformat org.apache.hadoop.mapred.TextInputFormat
               file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+              location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
               name srcpart
               partition_columns ds/hr
               serialization.ddl struct srcpart { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              transient_lastDdlTime 1288389287
+              transient_lastDdlTime 1296766190
             serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
           
               input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1821,17 +1821,17 @@ STAGE PLANS:
                 columns.types string:string
                 file.inputformat org.apache.hadoop.mapred.TextInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+                location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
                 name srcpart
                 partition_columns ds/hr
                 serialization.ddl struct srcpart { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                transient_lastDdlTime 1288389287
+                transient_lastDdlTime 1296766190
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: srcpart
             name: srcpart
-        pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
+        pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
           Partition
             base file name: hr=12
             input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1845,13 +1845,13 @@ STAGE PLANS:
               columns.types string:string
               file.inputformat org.apache.hadoop.mapred.TextInputFormat
               file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+              location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
               name srcpart
               partition_columns ds/hr
               serialization.ddl struct srcpart { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              transient_lastDdlTime 1288389287
+              transient_lastDdlTime 1296766190
             serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
           
               input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1862,13 +1862,13 @@ STAGE PLANS:
                 columns.types string:string
                 file.inputformat org.apache.hadoop.mapred.TextInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+                location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
                 name srcpart
                 partition_columns ds/hr
                 serialization.ddl struct srcpart { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                transient_lastDdlTime 1288389287
+                transient_lastDdlTime 1296766190
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: srcpart
             name: srcpart
@@ -1877,9 +1877,9 @@ STAGE PLANS:
           File Output Operator
             compressed: false
             GlobalTableId: 0
-            directory: file:/tmp/sdong/hive_2010-10-29_15-31-58_886_3715859592074339834/-ext-10001
+            directory: file:/tmp/njain/hive_2011-02-03_12-50-06_477_8143234486934165536/-ext-10001
             NumFilesPerFileSink: 1
-            Stats Publishing Key Prefix: file:/tmp/sdong/hive_2010-10-29_15-31-58_886_3715859592074339834/-ext-10001/
+            Stats Publishing Key Prefix: file:/tmp/njain/hive_2011-02-03_12-50-06_477_8143234486934165536/-ext-10001/
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1900,103 +1900,101 @@ PREHOOK: query: select * from srcpart a 
 PREHOOK: type: QUERY
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-59_013_2075633210966279442/-mr-10000
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-50-06_625_1807918217728904887/-mr-10000
 POSTHOOK: query: select * from srcpart a where a.ds='2008-04-08' and rand(100) < 0.1 order by a.key, a.hr
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-59_013_2075633210966279442/-mr-10000
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-50-06_625_1807918217728904887/-mr-10000
 113	val_113	2008-04-08	11
-113	val_113	2008-04-08	12
 118	val_118	2008-04-08	11
-118	val_118	2008-04-08	12
+12	val_12	2008-04-08	12
+125	val_125	2008-04-08	12
 128	val_128	2008-04-08	11
-128	val_128	2008-04-08	12
 143	val_143	2008-04-08	11
 143	val_143	2008-04-08	12
 145	val_145	2008-04-08	11
-145	val_145	2008-04-08	12
+149	val_149	2008-04-08	12
 15	val_15	2008-04-08	11
 15	val_15	2008-04-08	12
+160	val_160	2008-04-08	12
 164	val_164	2008-04-08	11
-164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
 170	val_170	2008-04-08	11
-170	val_170	2008-04-08	12
 176	val_176	2008-04-08	11
 176	val_176	2008-04-08	12
 181	val_181	2008-04-08	11
-181	val_181	2008-04-08	12
+191	val_191	2008-04-08	12
 195	val_195	2008-04-08	11
-195	val_195	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+203	val_203	2008-04-08	12
+216	val_216	2008-04-08	12
+218	val_218	2008-04-08	12
 223	val_223	2008-04-08	11
-223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
 237	val_237	2008-04-08	11
-237	val_237	2008-04-08	12
 239	val_239	2008-04-08	11
-239	val_239	2008-04-08	12
+242	val_242	2008-04-08	12
 256	val_256	2008-04-08	11
 256	val_256	2008-04-08	12
+278	val_278	2008-04-08	12
+288	val_288	2008-04-08	12
 292	val_292	2008-04-08	11
-292	val_292	2008-04-08	12
 298	val_298	2008-04-08	11
-298	val_298	2008-04-08	12
+316	val_316	2008-04-08	12
+325	val_325	2008-04-08	12
+332	val_332	2008-04-08	12
+34	val_34	2008-04-08	12
 341	val_341	2008-04-08	11
-341	val_341	2008-04-08	12
+348	val_348	2008-04-08	12
 368	val_368	2008-04-08	11
-368	val_368	2008-04-08	12
 369	val_369	2008-04-08	11
-369	val_369	2008-04-08	12
+37	val_37	2008-04-08	12
 394	val_394	2008-04-08	11
-394	val_394	2008-04-08	12
+4	val_4	2008-04-08	12
+400	val_400	2008-04-08	12
 401	val_401	2008-04-08	11
-401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
 404	val_404	2008-04-08	11
-404	val_404	2008-04-08	12
 406	val_406	2008-04-08	11
-406	val_406	2008-04-08	12
+414	val_414	2008-04-08	12
 417	val_417	2008-04-08	11
-417	val_417	2008-04-08	12
+42	val_42	2008-04-08	12
 424	val_424	2008-04-08	11
 424	val_424	2008-04-08	11
 424	val_424	2008-04-08	12
-424	val_424	2008-04-08	12
 444	val_444	2008-04-08	11
 444	val_444	2008-04-08	12
 446	val_446	2008-04-08	11
-446	val_446	2008-04-08	12
 453	val_453	2008-04-08	11
-453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
 455	val_455	2008-04-08	11
 455	val_455	2008-04-08	12
 466	val_466	2008-04-08	11
-466	val_466	2008-04-08	12
 470	val_470	2008-04-08	11
-470	val_470	2008-04-08	12
 472	val_472	2008-04-08	11
-472	val_472	2008-04-08	12
+478	val_478	2008-04-08	12
 483	val_483	2008-04-08	11
-483	val_483	2008-04-08	12
+485	val_485	2008-04-08	12
 487	val_487	2008-04-08	11
-487	val_487	2008-04-08	12
 489	val_489	2008-04-08	11
 489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
 491	val_491	2008-04-08	11
-491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+497	val_497	2008-04-08	12
 53	val_53	2008-04-08	11
-53	val_53	2008-04-08	12
+64	val_64	2008-04-08	12
 65	val_65	2008-04-08	11
-65	val_65	2008-04-08	12
 69	val_69	2008-04-08	11
-69	val_69	2008-04-08	12
 70	val_70	2008-04-08	11
-70	val_70	2008-04-08	12
 72	val_72	2008-04-08	11
-72	val_72	2008-04-08	12
 76	val_76	2008-04-08	11
 76	val_76	2008-04-08	12
 78	val_78	2008-04-08	11
-78	val_78	2008-04-08	12
 85	val_85	2008-04-08	11
-85	val_85	2008-04-08	12
+92	val_92	2008-04-08	12
 97	val_97	2008-04-08	11
-97	val_97	2008-04-08	12

Added: hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat5.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat5.q.out?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat5.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat5.q.out Mon Feb  7 19:58:43 2011
@@ -0,0 +1,96 @@
+PREHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@partition_test_partitioned
+PREHOOK: Output: default@partition_test_partitioned
+POSTHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@partition_test_partitioned
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: alter table partition_test_partitioned set fileformat Sequencefile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@partition_test_partitioned
+PREHOOK: Output: default@partition_test_partitioned
+POSTHOOK: query: alter table partition_test_partitioned set fileformat Sequencefile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@partition_test_partitioned
+POSTHOOK: Output: default@partition_test_partitioned
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=102) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=102
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=102) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=102
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select dt, count(1) from partition_test_partitioned where dt is not null group by dt
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Input: default@partition_test_partitioned@dt=102
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-20-57_358_902155619985638012/-mr-10000
+POSTHOOK: query: select dt, count(1) from partition_test_partitioned where dt is not null group by dt
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Input: default@partition_test_partitioned@dt=102
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-20-57_358_902155619985638012/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+101	25
+102	25
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=103) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=103
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=103) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=103
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=103).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=103).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select dt, count(1) from partition_test_partitioned where dt is not null group by dt
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Input: default@partition_test_partitioned@dt=102
+PREHOOK: Input: default@partition_test_partitioned@dt=103
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-21-05_958_2383252255379009164/-mr-10000
+POSTHOOK: query: select dt, count(1) from partition_test_partitioned where dt is not null group by dt
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Input: default@partition_test_partitioned@dt=102
+POSTHOOK: Input: default@partition_test_partitioned@dt=103
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-21-05_958_2383252255379009164/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=103).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=103).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+101	25
+102	25
+103	25

Added: hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat6.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat6.q.out?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat6.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat6.q.out Mon Feb  7 19:58:43 2011
@@ -0,0 +1,85 @@
+PREHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@partition_test_partitioned
+PREHOOK: Output: default@partition_test_partitioned
+POSTHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@partition_test_partitioned
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: alter table partition_test_partitioned set fileformat Sequencefile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@partition_test_partitioned
+PREHOOK: Output: default@partition_test_partitioned
+POSTHOOK: query: alter table partition_test_partitioned set fileformat Sequencefile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@partition_test_partitioned
+POSTHOOK: Output: default@partition_test_partitioned
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=102) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=102
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=102) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=102
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=101 and key < 20)s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-34-45_437_4407266801936254750/-mr-10000
+POSTHOOK: query: select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=101 and key < 20)s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-34-45_437_4407266801936254750/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+2
+PREHOOK: query: select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=102 and key < 20)s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Input: default@partition_test_partitioned@dt=102
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-34-48_866_3762944345717931440/-mr-10000
+POSTHOOK: query: select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=102 and key < 20)s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Input: default@partition_test_partitioned@dt=102
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-34-48_866_3762944345717931440/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+2

Added: hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat7.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat7.q.out?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat7.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat7.q.out Mon Feb  7 19:58:43 2011
@@ -0,0 +1,49 @@
+PREHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@partition_test_partitioned
+PREHOOK: Output: default@partition_test_partitioned
+POSTHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@partition_test_partitioned
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select count(1) from partition_test_partitioned  a join partition_test_partitioned  b on a.key = b.key
+where a.dt = '101' and b.dt = '101'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-45-22_412_8167802428528151380/-mr-10000
+POSTHOOK: query: select count(1) from partition_test_partitioned  a join partition_test_partitioned  b on a.key = b.key
+where a.dt = '101' and b.dt = '101'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-45-22_412_8167802428528151380/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+115
+PREHOOK: query: select count(1) from partition_test_partitioned  a join partition_test_partitioned  b on a.key = b.key
+where a.dt = '101' and b.dt = '101' and a.key < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-45-28_424_5032685511472669327/-mr-10000
+POSTHOOK: query: select count(1) from partition_test_partitioned  a join partition_test_partitioned  b on a.key = b.key
+where a.dt = '101' and b.dt = '101' and a.key < 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-45-28_424_5032685511472669327/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+2