You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by he...@apache.org on 2011/02/07 20:58:44 UTC
svn commit: r1068083 [1/5] - in /hive/trunk: ./
common/src/java/org/apache/hadoop/hive/conf/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/io/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/java/org/apache/h...
Author: heyongqiang
Date: Mon Feb 7 19:58:43 2011
New Revision: 1068083
URL: http://svn.apache.org/viewvc?rev=1068083&view=rev
Log:
HIVE-1900 a mapper should be able to span multiple partitions (namit via He Yongqiang)
Added:
hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat5.q
hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat6.q
hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat7.q
hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat5.q.out
hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat6.q.out
hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat7.q.out
Modified:
hive/trunk/CHANGES.txt
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
hive/trunk/ql/src/test/results/clientpositive/input42.q.out
hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out
hive/trunk/ql/src/test/results/compiler/plan/case_sensitivity.q.xml
hive/trunk/ql/src/test/results/compiler/plan/input1.q.xml
hive/trunk/ql/src/test/results/compiler/plan/input2.q.xml
hive/trunk/ql/src/test/results/compiler/plan/input3.q.xml
hive/trunk/ql/src/test/results/compiler/plan/input6.q.xml
hive/trunk/ql/src/test/results/compiler/plan/input7.q.xml
hive/trunk/ql/src/test/results/compiler/plan/input9.q.xml
hive/trunk/ql/src/test/results/compiler/plan/input_testsequencefile.q.xml
hive/trunk/ql/src/test/results/compiler/plan/sample2.q.xml
hive/trunk/ql/src/test/results/compiler/plan/sample3.q.xml
hive/trunk/ql/src/test/results/compiler/plan/sample4.q.xml
hive/trunk/ql/src/test/results/compiler/plan/sample5.q.xml
hive/trunk/ql/src/test/results/compiler/plan/sample6.q.xml
hive/trunk/ql/src/test/results/compiler/plan/sample7.q.xml
hive/trunk/ql/src/test/results/compiler/plan/subq.q.xml
hive/trunk/ql/src/test/results/compiler/plan/union.q.xml
Modified: hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hive/trunk/CHANGES.txt?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/CHANGES.txt (original)
+++ hive/trunk/CHANGES.txt Mon Feb 7 19:58:43 2011
@@ -175,6 +175,9 @@ Trunk - Unreleased
HIVE-1961 Make Stats gathering more flexible with timeout and atomicity
(Ning Zhang via namit)
+ HIVE-1900 a mapper should be able to span multiple partitions
+ (namit via He Yongqiang)
+
IMPROVEMENTS
HIVE-1235 use Ivy for fetching HBase dependencies (John Sichi via cws)
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Mon Feb 7 19:58:43 2011
@@ -373,6 +373,11 @@ public class HiveConf extends Configurat
HIVE_ERROR_ON_EMPTY_PARTITION("hive.error.on.empty.partition", false),
HIVE_INDEX_IGNORE_HDFS_LOC("hive.index.compact.file.ignore.hdfs", false),
+
+ // temporary variable for testing. This is added just to turn off this feature in case of a bug in
+ // deployment. It has not been documented in hive-default.xml intentionally, this should be removed
+ // once the feature is stable
+ HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS("hive.mapper.cannot.span.multiple.partitions", false),
;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java Mon Feb 7 19:58:43 2011
@@ -107,6 +107,7 @@ public class ExecMapperContext {
}
public String getCurrentInputFile() {
+ currentInputFile = this.ioCxt.getInputFile();
return currentInputFile;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java Mon Feb 7 19:58:43 2011
@@ -194,6 +194,23 @@ public class MapJoinOperator extends Abs
}
}
+ // Load the hash table
+ @Override
+ public void cleanUpInputFileChangedOp() throws HiveException {
+ try {
+ if (firstRow) {
+ // generate the map metadata
+ generateMapMetaData();
+ firstRow = false;
+ }
+
+ loadHashTable();
+ } catch (SerDeException e) {
+ e.printStackTrace();
+ throw new HiveException(e);
+ }
+ }
+
@Override
public void processOp(Object row, int tag) throws HiveException {
@@ -203,9 +220,6 @@ public class MapJoinOperator extends Abs
generateMapMetaData();
firstRow = false;
}
- if (this.getExecContext().inputFileChanged()) {
- loadHashTable();
- }
// get alias
alias = order[tag];
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java Mon Feb 7 19:58:43 2011
@@ -25,6 +25,8 @@ import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
+import java.util.HashSet;
import java.util.Map.Entry;
import java.util.Properties;
@@ -79,6 +81,7 @@ public class MapOperator extends Operato
private transient boolean isPartitioned;
private transient boolean hasVC;
private Map<MapInputPath, MapOpCtx> opCtxMap;
+ private Set<MapInputPath> listInputPaths = new HashSet<MapInputPath>();
private Map<Operator<? extends Serializable>, java.util.ArrayList<String>> operatorToPaths;
@@ -121,6 +124,15 @@ public class MapOperator extends Operato
public int hashCode() {
return (op == null) ? 0 : op.hashCode();
}
+
+ public Operator<? extends Serializable> getOp() {
+ return op;
+ }
+
+ public void setOp(Operator<? extends Serializable> op) {
+ this.op = op;
+ }
+
}
private static class MapOpCtx {
@@ -271,10 +283,70 @@ public class MapOperator extends Operato
return opCtx;
}
+ /**
+ * Set the inspectors given a input. Since a mapper can span multiple partitions, the inspectors
+ * need to be changed if the input changes
+ **/
+ private void setInspectorInput(MapInputPath inp) {
+ Operator<? extends Serializable> op = inp.getOp();
+
+ deserializer = opCtxMap.get(inp).getDeserializer();
+ isPartitioned = opCtxMap.get(inp).isPartitioned();
+ rowWithPart = opCtxMap.get(inp).getRowWithPart();
+ rowObjectInspector = opCtxMap.get(inp).getRowObjectInspector();
+ if (listInputPaths.contains(inp)) {
+ return;
+ }
+
+ listInputPaths.add(inp);
+ StructObjectInspector rawRowObjectInspector = opCtxMap.get(inp).rawRowObjectInspector;
+ StructObjectInspector partObjectInspector = opCtxMap.get(inp).partObjectInspector;
+ if (op instanceof TableScanOperator) {
+ TableScanOperator tsOp = (TableScanOperator) op;
+ TableScanDesc tsDesc = tsOp.getConf();
+ if(tsDesc != null) {
+ this.vcs = tsDesc.getVirtualCols();
+ if (vcs != null && vcs.size() > 0) {
+ this.hasVC = true;
+ List<String> vcNames = new ArrayList<String>(vcs.size());
+ this.vcValues = new Writable[vcs.size()];
+ List<ObjectInspector> vcsObjectInspectors = new ArrayList<ObjectInspector>(vcs.size());
+ for (int i = 0; i < vcs.size(); i++) {
+ VirtualColumn vc = vcs.get(i);
+ vcsObjectInspectors.add(
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ ((PrimitiveTypeInfo) vc.getTypeInfo()).getPrimitiveCategory()));
+ vcNames.add(vc.getName());
+ }
+ StructObjectInspector vcStructObjectInspector = ObjectInspectorFactory
+ .getStandardStructObjectInspector(vcNames,
+ vcsObjectInspectors);
+ if (isPartitioned) {
+ this.rowWithPartAndVC = new Object[3];
+ this.rowWithPartAndVC[1] = this.rowWithPart[1];
+ } else {
+ this.rowWithPartAndVC = new Object[2];
+ }
+ if(partObjectInspector == null) {
+ this.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
+ .asList(new StructObjectInspector[] {
+ rowObjectInspector, vcStructObjectInspector }));
+ } else {
+ this.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
+ .asList(new StructObjectInspector[] {
+ rawRowObjectInspector, partObjectInspector, vcStructObjectInspector }));
+ }
+ opCtxMap.get(inp).rowObjectInspector = this.rowObjectInspector;
+ }
+ }
+ }
+ }
+
public void setChildren(Configuration hconf) throws HiveException {
Path fpath = new Path((new Path(HiveConf.getVar(hconf,
HiveConf.ConfVars.HADOOPMAPFILENAME))).toUri().getPath());
+
ArrayList<Operator<? extends Serializable>> children = new ArrayList<Operator<? extends Serializable>>();
opCtxMap = new HashMap<MapInputPath, MapOpCtx>();
operatorToPaths = new HashMap<Operator<? extends Serializable>, java.util.ArrayList<String>>();
@@ -311,51 +383,7 @@ public class MapOperator extends Operato
LOG.info("dump " + op.getName() + " "
+ opCtxMap.get(inp).getRowObjectInspector().getTypeName());
if (!done) {
- deserializer = opCtxMap.get(inp).getDeserializer();
- isPartitioned = opCtxMap.get(inp).isPartitioned();
- rowWithPart = opCtxMap.get(inp).getRowWithPart();
- rowObjectInspector = opCtxMap.get(inp).getRowObjectInspector();
- StructObjectInspector rawRowObjectInspector = opCtxMap.get(inp).rawRowObjectInspector;
- StructObjectInspector partObjectInspector = opCtxMap.get(inp).partObjectInspector;
- if (op instanceof TableScanOperator) {
- TableScanOperator tsOp = (TableScanOperator) op;
- TableScanDesc tsDesc = tsOp.getConf();
- if(tsDesc != null) {
- this.vcs = tsDesc.getVirtualCols();
- if (vcs != null && vcs.size() > 0) {
- this.hasVC = true;
- List<String> vcNames = new ArrayList<String>(vcs.size());
- this.vcValues = new Writable[vcs.size()];
- List<ObjectInspector> vcsObjectInspectors = new ArrayList<ObjectInspector>(vcs.size());
- for (int i = 0; i < vcs.size(); i++) {
- VirtualColumn vc = vcs.get(i);
- vcsObjectInspectors.add(
- PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
- ((PrimitiveTypeInfo) vc.getTypeInfo()).getPrimitiveCategory()));
- vcNames.add(vc.getName());
- }
- StructObjectInspector vcStructObjectInspector = ObjectInspectorFactory
- .getStandardStructObjectInspector(vcNames,
- vcsObjectInspectors);
- if (isPartitioned) {
- this.rowWithPartAndVC = new Object[3];
- this.rowWithPartAndVC[1] = this.rowWithPart[1];
- } else {
- this.rowWithPartAndVC = new Object[2];
- }
- if(partObjectInspector == null) {
- this.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
- .asList(new StructObjectInspector[] {
- rowObjectInspector, vcStructObjectInspector }));
- } else {
- this.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
- .asList(new StructObjectInspector[] {
- rawRowObjectInspector, partObjectInspector, vcStructObjectInspector }));
- }
- opCtxMap.get(inp).rowObjectInspector = this.rowObjectInspector;
- }
- }
- }
+ setInspectorInput(inp);
done = true;
}
}
@@ -430,7 +458,41 @@ public class MapOperator extends Operato
}
}
+ // Change the serializer etc. since it is a new file, and split can span
+ // multiple files/partitions.
+ public void cleanUpInputFileChangedOp() throws HiveException {
+ Path fpath = new Path((new Path(this.getExecContext().getCurrentInputFile()))
+ .toUri().getPath());
+
+ for (String onefile : conf.getPathToAliases().keySet()) {
+ Path onepath = new Path(new Path(onefile).toUri().getPath());
+ // check for the operators who will process rows coming to this Map
+ // Operator
+ if (!onepath.toUri().relativize(fpath.toUri()).equals(fpath.toUri())) {
+ String onealias = conf.getPathToAliases().get(onefile).get(0);
+ Operator<? extends Serializable> op =
+ conf.getAliasToWork().get(onealias);
+
+ LOG.info("Processing alias " + onealias + " for file " + onefile);
+
+ MapInputPath inp = new MapInputPath(onefile, onealias, op);
+ setInspectorInput(inp);
+ break;
+ }
+ }
+ }
+
public void process(Writable value) throws HiveException {
+ // A mapper can span multiple files/partitions.
+ // The serializers need to be reset if the input file changed
+ if ((this.getExecContext() != null) &&
+ this.getExecContext().inputFileChanged()) {
+ LOG.info("Processing path " + this.getExecContext().getCurrentInputFile());
+
+ // The child operators cleanup if input file has changed
+ cleanUpInputFileChanged();
+ }
+
Object row = null;
try {
if (this.hasVC) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Mon Feb 7 19:58:43 2011
@@ -1236,4 +1236,22 @@ public abstract class Operator<T extends
}
}
}
+
+ // The input file has changed - every operator can invoke specific action
+ // for each input file
+ public void cleanUpInputFileChanged() throws HiveException {
+ this.cleanUpInputFileChangedOp();
+ if(this.childOperators != null) {
+ for (int i = 0; i<this.childOperators.size();i++) {
+ Operator<? extends Serializable> op = this.childOperators.get(i);
+ op.cleanUpInputFileChanged();
+ }
+ }
+ }
+
+ // If a operator needs to invoke specific cleanup, that operator can override
+ // this method
+ public void cleanUpInputFileChangedOp() throws HiveException {
+ }
+
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Mon Feb 7 19:58:43 2011
@@ -67,6 +67,7 @@ public class SMBMapJoinOperator extends
private transient boolean[] fetchOpDone;
private transient boolean[] foundNextKeyGroup;
transient boolean firstFetchHappened = false;
+ private transient boolean inputFileChanged = false;
transient boolean localWorkInited = false;
public SMBMapJoinOperator() {
@@ -85,6 +86,7 @@ public class SMBMapJoinOperator extends
closeCalled = false;
this.firstFetchHappened = false;
+ this.inputFileChanged = false;
// get the largest table alias from order
int maxAlias = 0;
@@ -177,11 +179,17 @@ public class SMBMapJoinOperator extends
}
}
+ // The input file has changed - load the correct hash bucket
+ @Override
+ public void cleanUpInputFileChangedOp() throws HiveException {
+ inputFileChanged = true;
+ }
+
@Override
public void processOp(Object row, int tag) throws HiveException {
if (tag == posBigTable) {
- if (this.getExecContext().inputFileChanged()) {
+ if (inputFileChanged) {
if (firstFetchHappened) {
// we need to first join and flush out data left by the previous file.
joinFinalLeftData();
@@ -194,6 +202,7 @@ public class SMBMapJoinOperator extends
setUpFetchOpContext(fetchOp, alias);
}
firstFetchHappened = false;
+ inputFileChanged = false;
}
}
@@ -531,8 +540,7 @@ public class SMBMapJoinOperator extends
}
closeCalled = true;
- if ((this.getExecContext() != null && this.getExecContext().inputFileChanged())
- || !firstFetchHappened) {
+ if (inputFileChanged || !firstFetchHappened) {
//set up the fetch operator for the new input file.
for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
String alias = entry.getKey();
@@ -546,6 +554,7 @@ public class SMBMapJoinOperator extends
fetchNextGroup(t);
}
}
+ inputFileChanged = false;
}
joinFinalLeftData();
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java Mon Feb 7 19:58:43 2011
@@ -51,6 +51,7 @@ public class TableScanOperator extends O
private transient Configuration hconf;
private transient Stat stat;
private transient String partitionSpecs;
+ private transient boolean inputFileChanged = false;
private TableDesc tableDesc;
@@ -77,8 +78,16 @@ public class TableScanOperator extends O
forward(row, inputObjInspectors[tag]);
}
+ // Change the table partition for collecting stats
+ @Override
+ public void cleanUpInputFileChangedOp() throws HiveException {
+ inputFileChanged = true;
+ }
+
private void gatherStats(Object row) {
- if (stat == null) { // first row/call
+ // first row/call or a new partition
+ if ((stat == null) || inputFileChanged) {
+ inputFileChanged = false;
stat = new Stat();
if (conf.getPartColumns() == null || conf.getPartColumns().size() == 0) {
partitionSpecs = "";
@@ -122,6 +131,8 @@ public class TableScanOperator extends O
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
initializeChildren(hconf);
+ inputFileChanged = false;
+
if (conf == null) {
return;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Mon Feb 7 19:58:43 2011
@@ -22,9 +22,12 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.IOException;
+import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
+import java.util.List;
+import java.util.HashMap;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
@@ -50,9 +53,7 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
-
-
-
+import org.apache.hadoop.hive.ql.exec.Operator;
/**
* CombineHiveInputFormat is a parameterized InputFormat which looks at the path
@@ -211,7 +212,37 @@ public class CombineHiveInputFormat<K ex
out.writeUTF(inputFormatClassName);
}
}
-
+
+ // Splits are not shared across different partitions with different input formats.
+ // For example, 2 partitions (1 sequencefile and 1 rcfile) will have 2 different splits
+ private static class CombinePathInputFormat {
+ private List<Operator<? extends Serializable>> opList;
+ private String inputFormatClassName;
+
+ public CombinePathInputFormat(List<Operator<? extends Serializable>> opList,
+ String inputFormatClassName) {
+ this.opList = opList;
+ this.inputFormatClassName = inputFormatClassName;
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof CombinePathInputFormat) {
+ CombinePathInputFormat mObj = (CombinePathInputFormat)o;
+ if (mObj == null) {
+ return false;
+ }
+ return opList.equals(mObj.opList) &&
+ inputFormatClassName.equals(mObj.inputFormatClassName);
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ return (opList == null) ? 0 : opList.hashCode();
+ }
+ }
+
/**
* Create Hive splits based on CombineFileSplit.
*/
@@ -219,6 +250,9 @@ public class CombineHiveInputFormat<K ex
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
init(job);
+ Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
+ Map<String, Operator<? extends Serializable>> aliasToWork =
+ mrwork.getAliasToWork();
CombineFileInputFormatShim combine = ShimLoader.getHadoopShims()
.getCombineFileInputFormat();
@@ -234,7 +268,10 @@ public class CombineHiveInputFormat<K ex
// combine splits only from same tables and same partitions. Do not combine splits from multiple
// tables or multiple partitions.
Path[] paths = combine.getInputPathsShim(job);
+ Map<CombinePathInputFormat, CombineFilter> poolMap =
+ new HashMap<CombinePathInputFormat, CombineFilter>();
Set<Path> poolSet = new HashSet<Path>();
+
for (Path path : paths) {
PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
@@ -246,6 +283,7 @@ public class CombineHiveInputFormat<K ex
// Use HiveInputFormat if any of the paths is not splittable
Class inputFormatClass = part.getInputFileFormatClass();
+ String inputFormatClassName = inputFormatClass.getName();
InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
// Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not,
@@ -288,25 +326,54 @@ public class CombineHiveInputFormat<K ex
return super.getSplits(job, numSplits);
}
+ Path filterPath = path;
+
// In the case of tablesample, the input paths are pointing to files rather than directories.
// We need to get the parent directory as the filtering path so that all files in the same
// parent directory will be grouped into one pool but not files from different parent
// directories. This guarantees that a split will combine all files in the same partition
- // but won't cross multiple partitions.
- Path filterPath = path;
- if (!path.getFileSystem(job).getFileStatus(path).isDir()) { // path is not directory
+ // but won't cross multiple partitions if the user has asked so.
+ if (mrwork.isMapperCannotSpanPartns() &&
+ !path.getFileSystem(job).getFileStatus(path).isDir()) { // path is not directory
filterPath = path.getParent();
}
- if (!poolSet.contains(filterPath)) {
- LOG.info("CombineHiveInputSplit creating pool for " + path +
- "; using filter path " + filterPath);
- combine.createPool(job, new CombineFilter(filterPath));
+
+ // Does a pool exist for this path already
+ CombineFilter f = null;
+ List<Operator<? extends Serializable>> opList = null;
+ boolean done = false;
+
+ if (!mrwork.isMapperCannotSpanPartns()) {
+ opList = HiveFileFormatUtils.doGetAliasesFromPath(
+ pathToAliases, aliasToWork, filterPath);
+ f = poolMap.get(new CombinePathInputFormat(opList, inputFormatClassName));
+ }
+ else {
+ if (poolSet.contains(filterPath)) {
+ LOG.info("CombineHiveInputSplit: pool is already created for " + path +
+ "; using filter path " + filterPath);
+ done = true;
+ }
poolSet.add(filterPath);
- } else {
- LOG.info("CombineHiveInputSplit: pool is already created for " + path +
- "; using filter path " + filterPath);
+ }
+
+ if (!done) {
+ if (f == null) {
+ f = new CombineFilter(filterPath);
+ LOG.info("CombineHiveInputSplit creating pool for " + path +
+ "; using filter path " + filterPath);
+ combine.createPool(job, f);
+ if (!mrwork.isMapperCannotSpanPartns()) {
+ poolMap.put(new CombinePathInputFormat(opList, inputFormatClassName), f);
+ }
+ } else {
+ LOG.info("CombineHiveInputSplit: pool is already created for " + path +
+ "; using filter path " + filterPath);
+ f.addPath(filterPath);
+ }
}
}
+
InputSplitShim[] iss = combine.getSplits(job, 1);
for (InputSplitShim is : iss) {
CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is);
@@ -314,7 +381,6 @@ public class CombineHiveInputFormat<K ex
}
LOG.info("number of splits " + result.size());
-
return result.toArray(new CombineHiveInputSplit[result.size()]);
}
@@ -351,7 +417,7 @@ public class CombineHiveInputFormat<K ex
}
static class CombineFilter implements PathFilter {
- private final String pString;
+ private List<String> pStrings = new ArrayList<String>();
// store a path prefix in this TestFilter
// PRECONDITION: p should always be a directory
@@ -359,21 +425,33 @@ public class CombineHiveInputFormat<K ex
// we need to keep the path part only because the Hadoop CombineFileInputFormat will
// pass the path part only to accept().
// Trailing the path with a separator to prevent partial matching.
- pString = p.toUri().getPath().toString() + File.separator;;
+ addPath(p);
+ }
+
+ public void addPath(Path p) {
+ String pString = p.toUri().getPath().toString() + File.separator;;
+ pStrings.add(pString);
}
// returns true if the specified path matches the prefix stored
// in this TestFilter.
public boolean accept(Path path) {
- if (path.toString().indexOf(pString) == 0) {
- return true;
+ for (String pString : pStrings) {
+ if (path.toString().indexOf(pString) == 0) {
+ return true;
+ }
}
return false;
}
@Override
public String toString() {
- return "PathFilter:" + pString;
+ StringBuilder s = new StringBuilder();
+ s.append("PathFilter: ");
+ for (String pString : pStrings) {
+ s.append(pString + " ");
+ }
+ return s.toString();
}
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java Mon Feb 7 19:58:43 2011
@@ -20,7 +20,9 @@ package org.apache.hadoop.hive.ql.io;
import java.io.File;
import java.io.IOException;
+import java.io.Serializable;
import java.util.ArrayList;
+import java.util.List;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
@@ -31,6 +33,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
+import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
@@ -244,7 +247,7 @@ public final class HiveFileFormatUtils {
}
return null;
}
-
+
public static PartitionDesc getPartitionDescFromPathRecursively(
Map<String, PartitionDesc> pathToPartitionInfo, Path dir,
Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> cacheMap)
@@ -252,7 +255,7 @@ public final class HiveFileFormatUtils {
return getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
cacheMap, false);
}
-
+
public static PartitionDesc getPartitionDescFromPathRecursively(
Map<String, PartitionDesc> pathToPartitionInfo, Path dir,
Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> cacheMap,
@@ -267,11 +270,11 @@ public final class HiveFileFormatUtils {
if (cacheMap != null) {
newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo);
}
-
+
if (newPathToPartitionInfo == null) { // still null
newPathToPartitionInfo = new HashMap<String, PartitionDesc>();
populateNewPartitionDesc(pathToPartitionInfo, newPathToPartitionInfo);
-
+
if (cacheMap != null) {
cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo);
}
@@ -309,7 +312,7 @@ public final class HiveFileFormatUtils {
// LOG.warn("exact match not found, try ripping input path's theme and authority");
part = pathToPartitionInfo.get(dirPath);
}
-
+
if (part == null) {
String dirStr = dir.toString();
int dirPathIndex = dirPath.lastIndexOf(File.separator);
@@ -333,6 +336,68 @@ public final class HiveFileFormatUtils {
return part;
}
+ private static boolean foundAlias(Map<String, ArrayList<String>> pathToAliases,
+ String path) {
+ List<String> aliases = pathToAliases.get(path);
+ if ((aliases == null) || (aliases.isEmpty())) {
+ return false;
+ }
+ return true;
+ }
+
+ private static String getMatchingPath(Map<String, ArrayList<String>> pathToAliases,
+ Path dir) {
+ // First find the path to be searched
+ String path = dir.toString();
+ if (foundAlias(pathToAliases, path)) {
+ return path;
+ }
+
+ String dirPath = dir.toUri().getPath();
+ if (foundAlias(pathToAliases, dirPath)) {
+ return dirPath;
+ }
+ path = dirPath;
+
+ String dirStr = dir.toString();
+ int dirPathIndex = dirPath.lastIndexOf(File.separator);
+ int dirStrIndex = dirStr.lastIndexOf(File.separator);
+ while (dirPathIndex >= 0 && dirStrIndex >= 0) {
+ dirStr = dirStr.substring(0, dirStrIndex);
+ dirPath = dirPath.substring(0, dirPathIndex);
+ //first try full match
+ if (foundAlias(pathToAliases, dirStr)) {
+ return dirStr;
+ }
+ if (foundAlias(pathToAliases, dirPath)) {
+ return dirPath;
+ }
+ dirPathIndex = dirPath.lastIndexOf(File.separator);
+ dirStrIndex = dirStr.lastIndexOf(File.separator);
+ }
+ return null;
+ }
+
+ /**
+ * Get the list of operatators from the opeerator tree that are needed for the path
+ * @param pathToAliases mapping from path to aliases
+ * @param aliasToWork The operator tree to be invoked for a given alias
+ * @param dir The path to look for
+ **/
+ public static List<Operator<? extends Serializable>> doGetAliasesFromPath(
+ Map<String, ArrayList<String>> pathToAliases,
+ Map<String, Operator<? extends Serializable>> aliasToWork, Path dir) {
+
+ String path = getMatchingPath(pathToAliases, dir);
+ List<Operator<? extends Serializable>> opList =
+ new ArrayList<Operator<? extends Serializable>>();
+ List<String> aliases = pathToAliases.get(path);
+ for (String alias : aliases) {
+ opList.add(aliasToWork.get(alias));
+ }
+ return opList;
+ }
+
private HiveFileFormatUtils() {
// prevent instantiation
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Mon Feb 7 19:58:43 2011
@@ -288,6 +288,9 @@ public class HiveInputFormat<K extends W
result.add(new HiveInputSplit(is, inputFormatClass.getName()));
}
}
+
+ LOG.info("number of splits " + result.size());
+
return result.toArray(new HiveInputSplit[result.size()]);
}
@@ -365,7 +368,7 @@ public class HiveInputFormat<K extends W
pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath,
splitPathWithNoSchema, false);
}
-
+
protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass,
String splitPath, String splitPathWithNoSchema, boolean nonNative) {
if (this.mrwork == null) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java Mon Feb 7 19:58:43 2011
@@ -426,6 +426,7 @@ public class GenMRFileSink1 implements N
cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null));
cplan.setNumReduceTasks(0);
cplan.getAliasToWork().put(inputDir, topOp);
+ cplan.setMapperCannotSpanPartns(true);
return cplan;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Mon Feb 7 19:58:43 2011
@@ -765,6 +765,11 @@ public final class GenMapRedUtils {
*/
public static MapredWork getMapRedWork(HiveConf conf) {
MapredWork work = new MapredWork();
+ // This code has been only added for testing
+ boolean mapperCannotSpanPartns =
+ conf.getBoolVar(
+ HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
+ work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>());
work.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>());
work.setAliasToWork(new LinkedHashMap<String, Operator<? extends Serializable>>());
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java Mon Feb 7 19:58:43 2011
@@ -58,6 +58,7 @@ import org.apache.hadoop.hive.ql.plan.Ta
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
/**
* GenMRSkewJoinProcessor.
@@ -232,6 +233,13 @@ public final class GenMRSkewJoinProcesso
for (int i = 0; i < numAliases - 1; i++) {
Byte src = tags[i];
MapredWork newPlan = PlanUtils.getMapRedWork();
+
+ // This code has been only added for testing
+ boolean mapperCannotSpanPartns =
+ parseCtx.getConf().getBoolVar(
+ HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
+ newPlan.setMapperCannotSpanPartns(mapperCannotSpanPartns);
+
MapredWork clonePlan = null;
try {
String xmlPlan = currPlan.toXML();
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java Mon Feb 7 19:58:43 2011
@@ -29,6 +29,9 @@ import org.apache.hadoop.hive.ql.exec.Op
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.parse.OpParseContext;
import org.apache.hadoop.hive.ql.parse.QBJoinTree;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
/**
* MapredWork.
@@ -75,6 +78,8 @@ public class MapredWork implements Seria
private QBJoinTree joinTree;
+ private boolean mapperCannotSpanPartns;
+
public MapredWork() {
aliasToPartnInfo = new LinkedHashMap<String, PartitionDesc>();
}
@@ -339,6 +344,14 @@ public class MapredWork implements Seria
return this.gatheringStats;
}
+ public void setMapperCannotSpanPartns(boolean mapperCannotSpanPartns) {
+ this.mapperCannotSpanPartns = mapperCannotSpanPartns;
+ }
+
+ public boolean isMapperCannotSpanPartns() {
+ return this.mapperCannotSpanPartns;
+ }
+
public String getTmpHDFSFileURI() {
return tmpHDFSFileURI;
}
Added: hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat5.q?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat5.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat5.q Mon Feb 7 19:58:43 2011
@@ -0,0 +1,14 @@
+set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+
+create table partition_test_partitioned(key string, value string) partitioned by (dt string);
+
+alter table partition_test_partitioned set fileformat rcfile;
+insert overwrite table partition_test_partitioned partition(dt=101) select * from src1;
+alter table partition_test_partitioned set fileformat Sequencefile;
+insert overwrite table partition_test_partitioned partition(dt=102) select * from src1;
+
+select dt, count(1) from partition_test_partitioned where dt is not null group by dt;
+
+insert overwrite table partition_test_partitioned partition(dt=103) select * from src1;
+
+select dt, count(1) from partition_test_partitioned where dt is not null group by dt;
Added: hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat6.q?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat6.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat6.q Mon Feb 7 19:58:43 2011
@@ -0,0 +1,19 @@
+set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+
+create table partition_test_partitioned(key string, value string) partitioned by (dt string);
+
+alter table partition_test_partitioned set fileformat rcfile;
+insert overwrite table partition_test_partitioned partition(dt=101) select * from src1;
+alter table partition_test_partitioned set fileformat Sequencefile;
+
+insert overwrite table partition_test_partitioned partition(dt=102) select * from src1;
+
+select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=101 and key < 20)s;
+
+select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=102 and key < 20)s;
Added: hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat7.q?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat7.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/partition_wise_fileformat7.q Mon Feb 7 19:58:43 2011
@@ -0,0 +1,12 @@
+set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+
+create table partition_test_partitioned(key string, value string) partitioned by (dt string);
+
+alter table partition_test_partitioned set fileformat rcfile;
+insert overwrite table partition_test_partitioned partition(dt=101) select * from src1;
+
+select count(1) from partition_test_partitioned a join partition_test_partitioned b on a.key = b.key
+where a.dt = '101' and b.dt = '101';
+
+select count(1) from partition_test_partitioned a join partition_test_partitioned b on a.key = b.key
+where a.dt = '101' and b.dt = '101' and a.key < 100;
\ No newline at end of file
Modified: hive/trunk/ql/src/test/results/clientpositive/input42.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input42.q.out?rev=1068083&r1=1068082&r2=1068083&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input42.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/input42.q.out Mon Feb 7 19:58:43 2011
@@ -54,10 +54,10 @@ STAGE PLANS:
type: string
Needs Tagging: false
Path -> Alias:
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
Path -> Partition:
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -71,13 +71,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -88,17 +88,17 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: srcpart
name: srcpart
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
Partition
base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -112,13 +112,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -129,13 +129,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: srcpart
name: srcpart
@@ -144,9 +144,9 @@ STAGE PLANS:
File Output Operator
compressed: false
GlobalTableId: 0
- directory: file:/tmp/sdong/hive_2010-10-29_15-31-51_423_2950637669981243052/-ext-10001
+ directory: file:/tmp/njain/hive_2011-02-03_12-49-59_467_3796150470392177289/-ext-10001
NumFilesPerFileSink: 1
- Stats Publishing Key Prefix: file:/tmp/sdong/hive_2010-10-29_15-31-51_423_2950637669981243052/-ext-10001/
+ Stats Publishing Key Prefix: file:/tmp/njain/hive_2011-02-03_12-49-59_467_3796150470392177289/-ext-10001/
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -167,12 +167,12 @@ PREHOOK: query: select * from srcpart a
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-51_551_23639250440929738/-mr-10000
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-49-59_801_525167415497485863/-mr-10000
POSTHOOK: query: select * from srcpart a where a.ds='2008-04-08' order by a.key, a.hr
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-51_551_23639250440929738/-mr-10000
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-49-59_801_525167415497485863/-mr-10000
0 val_0 2008-04-08 11
0 val_0 2008-04-08 11
0 val_0 2008-04-08 11
@@ -1234,10 +1234,10 @@ STAGE PLANS:
type: string
Needs Tagging: false
Path -> Alias:
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
Path -> Partition:
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1251,13 +1251,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1268,17 +1268,17 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: srcpart
name: srcpart
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
Partition
base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1292,13 +1292,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1309,13 +1309,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: srcpart
name: srcpart
@@ -1324,9 +1324,9 @@ STAGE PLANS:
File Output Operator
compressed: false
GlobalTableId: 0
- directory: file:/tmp/sdong/hive_2010-10-29_15-31-55_370_4976912681665825327/-ext-10001
+ directory: file:/tmp/njain/hive_2011-02-03_12-50-03_142_654060154054043640/-ext-10001
NumFilesPerFileSink: 1
- Stats Publishing Key Prefix: file:/tmp/sdong/hive_2010-10-29_15-31-55_370_4976912681665825327/-ext-10001/
+ Stats Publishing Key Prefix: file:/tmp/njain/hive_2011-02-03_12-50-03_142_654060154054043640/-ext-10001/
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1347,12 +1347,12 @@ PREHOOK: query: select * from srcpart a
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-55_529_6657949327034269590/-mr-10000
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-50-03_306_2508303776661842458/-mr-10000
POSTHOOK: query: select * from srcpart a where a.ds='2008-04-08' and key < 200 order by a.key, a.hr
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-55_529_6657949327034269590/-mr-10000
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-50-03_306_2508303776661842458/-mr-10000
0 val_0 2008-04-08 11
0 val_0 2008-04-08 11
0 val_0 2008-04-08 11
@@ -1787,10 +1787,10 @@ STAGE PLANS:
type: string
Needs Tagging: false
Path -> Alias:
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [a]
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [a]
Path -> Partition:
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1804,13 +1804,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1821,17 +1821,17 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: srcpart
name: srcpart
- pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
+ pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
Partition
base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1845,13 +1845,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -1862,13 +1862,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/srcpart
+ location pfile:/data/users/njain/hive3/build/ql/test/data/warehouse/srcpart
name srcpart
partition_columns ds/hr
serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- transient_lastDdlTime 1288389287
+ transient_lastDdlTime 1296766190
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: srcpart
name: srcpart
@@ -1877,9 +1877,9 @@ STAGE PLANS:
File Output Operator
compressed: false
GlobalTableId: 0
- directory: file:/tmp/sdong/hive_2010-10-29_15-31-58_886_3715859592074339834/-ext-10001
+ directory: file:/tmp/njain/hive_2011-02-03_12-50-06_477_8143234486934165536/-ext-10001
NumFilesPerFileSink: 1
- Stats Publishing Key Prefix: file:/tmp/sdong/hive_2010-10-29_15-31-58_886_3715859592074339834/-ext-10001/
+ Stats Publishing Key Prefix: file:/tmp/njain/hive_2011-02-03_12-50-06_477_8143234486934165536/-ext-10001/
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1900,103 +1900,101 @@ PREHOOK: query: select * from srcpart a
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-59_013_2075633210966279442/-mr-10000
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-50-06_625_1807918217728904887/-mr-10000
POSTHOOK: query: select * from srcpart a where a.ds='2008-04-08' and rand(100) < 0.1 order by a.key, a.hr
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_15-31-59_013_2075633210966279442/-mr-10000
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-03_12-50-06_625_1807918217728904887/-mr-10000
113 val_113 2008-04-08 11
-113 val_113 2008-04-08 12
118 val_118 2008-04-08 11
-118 val_118 2008-04-08 12
+12 val_12 2008-04-08 12
+125 val_125 2008-04-08 12
128 val_128 2008-04-08 11
-128 val_128 2008-04-08 12
143 val_143 2008-04-08 11
143 val_143 2008-04-08 12
145 val_145 2008-04-08 11
-145 val_145 2008-04-08 12
+149 val_149 2008-04-08 12
15 val_15 2008-04-08 11
15 val_15 2008-04-08 12
+160 val_160 2008-04-08 12
164 val_164 2008-04-08 11
-164 val_164 2008-04-08 12
+165 val_165 2008-04-08 12
+166 val_166 2008-04-08 12
170 val_170 2008-04-08 11
-170 val_170 2008-04-08 12
176 val_176 2008-04-08 11
176 val_176 2008-04-08 12
181 val_181 2008-04-08 11
-181 val_181 2008-04-08 12
+191 val_191 2008-04-08 12
195 val_195 2008-04-08 11
-195 val_195 2008-04-08 12
+197 val_197 2008-04-08 12
+199 val_199 2008-04-08 12
+203 val_203 2008-04-08 12
+216 val_216 2008-04-08 12
+218 val_218 2008-04-08 12
223 val_223 2008-04-08 11
-223 val_223 2008-04-08 12
+224 val_224 2008-04-08 12
237 val_237 2008-04-08 11
-237 val_237 2008-04-08 12
239 val_239 2008-04-08 11
-239 val_239 2008-04-08 12
+242 val_242 2008-04-08 12
256 val_256 2008-04-08 11
256 val_256 2008-04-08 12
+278 val_278 2008-04-08 12
+288 val_288 2008-04-08 12
292 val_292 2008-04-08 11
-292 val_292 2008-04-08 12
298 val_298 2008-04-08 11
-298 val_298 2008-04-08 12
+316 val_316 2008-04-08 12
+325 val_325 2008-04-08 12
+332 val_332 2008-04-08 12
+34 val_34 2008-04-08 12
341 val_341 2008-04-08 11
-341 val_341 2008-04-08 12
+348 val_348 2008-04-08 12
368 val_368 2008-04-08 11
-368 val_368 2008-04-08 12
369 val_369 2008-04-08 11
-369 val_369 2008-04-08 12
+37 val_37 2008-04-08 12
394 val_394 2008-04-08 11
-394 val_394 2008-04-08 12
+4 val_4 2008-04-08 12
+400 val_400 2008-04-08 12
401 val_401 2008-04-08 11
-401 val_401 2008-04-08 12
+402 val_402 2008-04-08 12
404 val_404 2008-04-08 11
-404 val_404 2008-04-08 12
406 val_406 2008-04-08 11
-406 val_406 2008-04-08 12
+414 val_414 2008-04-08 12
417 val_417 2008-04-08 11
-417 val_417 2008-04-08 12
+42 val_42 2008-04-08 12
424 val_424 2008-04-08 11
424 val_424 2008-04-08 11
424 val_424 2008-04-08 12
-424 val_424 2008-04-08 12
444 val_444 2008-04-08 11
444 val_444 2008-04-08 12
446 val_446 2008-04-08 11
-446 val_446 2008-04-08 12
453 val_453 2008-04-08 11
-453 val_453 2008-04-08 12
+454 val_454 2008-04-08 12
455 val_455 2008-04-08 11
455 val_455 2008-04-08 12
466 val_466 2008-04-08 11
-466 val_466 2008-04-08 12
470 val_470 2008-04-08 11
-470 val_470 2008-04-08 12
472 val_472 2008-04-08 11
-472 val_472 2008-04-08 12
+478 val_478 2008-04-08 12
483 val_483 2008-04-08 11
-483 val_483 2008-04-08 12
+485 val_485 2008-04-08 12
487 val_487 2008-04-08 11
-487 val_487 2008-04-08 12
489 val_489 2008-04-08 11
489 val_489 2008-04-08 12
+489 val_489 2008-04-08 12
+489 val_489 2008-04-08 12
491 val_491 2008-04-08 11
-491 val_491 2008-04-08 12
+492 val_492 2008-04-08 12
+497 val_497 2008-04-08 12
53 val_53 2008-04-08 11
-53 val_53 2008-04-08 12
+64 val_64 2008-04-08 12
65 val_65 2008-04-08 11
-65 val_65 2008-04-08 12
69 val_69 2008-04-08 11
-69 val_69 2008-04-08 12
70 val_70 2008-04-08 11
-70 val_70 2008-04-08 12
72 val_72 2008-04-08 11
-72 val_72 2008-04-08 12
76 val_76 2008-04-08 11
76 val_76 2008-04-08 12
78 val_78 2008-04-08 11
-78 val_78 2008-04-08 12
85 val_85 2008-04-08 11
-85 val_85 2008-04-08 12
+92 val_92 2008-04-08 12
97 val_97 2008-04-08 11
-97 val_97 2008-04-08 12
Added: hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat5.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat5.q.out?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat5.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat5.q.out Mon Feb 7 19:58:43 2011
@@ -0,0 +1,96 @@
+PREHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@partition_test_partitioned
+PREHOOK: Output: default@partition_test_partitioned
+POSTHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@partition_test_partitioned
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: alter table partition_test_partitioned set fileformat Sequencefile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@partition_test_partitioned
+PREHOOK: Output: default@partition_test_partitioned
+POSTHOOK: query: alter table partition_test_partitioned set fileformat Sequencefile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@partition_test_partitioned
+POSTHOOK: Output: default@partition_test_partitioned
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=102) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=102
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=102) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=102
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select dt, count(1) from partition_test_partitioned where dt is not null group by dt
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Input: default@partition_test_partitioned@dt=102
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-20-57_358_902155619985638012/-mr-10000
+POSTHOOK: query: select dt, count(1) from partition_test_partitioned where dt is not null group by dt
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Input: default@partition_test_partitioned@dt=102
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-20-57_358_902155619985638012/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+101 25
+102 25
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=103) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=103
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=103) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=103
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=103).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=103).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select dt, count(1) from partition_test_partitioned where dt is not null group by dt
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Input: default@partition_test_partitioned@dt=102
+PREHOOK: Input: default@partition_test_partitioned@dt=103
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-21-05_958_2383252255379009164/-mr-10000
+POSTHOOK: query: select dt, count(1) from partition_test_partitioned where dt is not null group by dt
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Input: default@partition_test_partitioned@dt=102
+POSTHOOK: Input: default@partition_test_partitioned@dt=103
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-21-05_958_2383252255379009164/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=103).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=103).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+101 25
+102 25
+103 25
Added: hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat6.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat6.q.out?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat6.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat6.q.out Mon Feb 7 19:58:43 2011
@@ -0,0 +1,85 @@
+PREHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@partition_test_partitioned
+PREHOOK: Output: default@partition_test_partitioned
+POSTHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@partition_test_partitioned
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: alter table partition_test_partitioned set fileformat Sequencefile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@partition_test_partitioned
+PREHOOK: Output: default@partition_test_partitioned
+POSTHOOK: query: alter table partition_test_partitioned set fileformat Sequencefile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@partition_test_partitioned
+POSTHOOK: Output: default@partition_test_partitioned
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=102) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=102
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=102) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=102
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=101 and key < 20)s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-34-45_437_4407266801936254750/-mr-10000
+POSTHOOK: query: select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=101 and key < 20)s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-34-45_437_4407266801936254750/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+2
+PREHOOK: query: select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=102 and key < 20)s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Input: default@partition_test_partitioned@dt=102
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-34-48_866_3762944345717931440/-mr-10000
+POSTHOOK: query: select count(1) from
+(select key, value from partition_test_partitioned where dt=101 and key < 100
+ union all
+select key, value from partition_test_partitioned where dt=102 and key < 20)s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Input: default@partition_test_partitioned@dt=102
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-34-48_866_3762944345717931440/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=102).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+2
Added: hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat7.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat7.q.out?rev=1068083&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat7.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/partition_wise_fileformat7.q.out Mon Feb 7 19:58:43 2011
@@ -0,0 +1,49 @@
+PREHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@partition_test_partitioned
+PREHOOK: Output: default@partition_test_partitioned
+POSTHOOK: query: alter table partition_test_partitioned set fileformat rcfile
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@partition_test_partitioned
+POSTHOOK: Output: default@partition_test_partitioned
+PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt=101) select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@partition_test_partitioned@dt=101
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select count(1) from partition_test_partitioned a join partition_test_partitioned b on a.key = b.key
+where a.dt = '101' and b.dt = '101'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-45-22_412_8167802428528151380/-mr-10000
+POSTHOOK: query: select count(1) from partition_test_partitioned a join partition_test_partitioned b on a.key = b.key
+where a.dt = '101' and b.dt = '101'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-45-22_412_8167802428528151380/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+115
+PREHOOK: query: select count(1) from partition_test_partitioned a join partition_test_partitioned b on a.key = b.key
+where a.dt = '101' and b.dt = '101' and a.key < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partition_test_partitioned@dt=101
+PREHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-45-28_424_5032685511472669327/-mr-10000
+POSTHOOK: query: select count(1) from partition_test_partitioned a join partition_test_partitioned b on a.key = b.key
+where a.dt = '101' and b.dt = '101' and a.key < 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partition_test_partitioned@dt=101
+POSTHOOK: Output: file:/tmp/njain/hive_2011-02-02_15-45-28_424_5032685511472669327/-mr-10000
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=101).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+2