You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/09/23 22:41:01 UTC
svn commit: r1525692 [2/8] - in /hive/branches/vectorization: ./
common/src/java/org/apache/hadoop/hive/conf/ conf/
contrib/src/test/results/clientpositive/
hbase-handler/src/java/org/apache/hadoop/hive/hbase/
hbase-handler/src/test/results/positive/ h...
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Mon Sep 23 20:40:54 2013
@@ -48,7 +48,6 @@ import org.apache.hadoop.hive.ql.udf.Gen
import org.apache.hadoop.hive.ql.udf.GenericUDFEncode;
import org.apache.hadoop.hive.ql.udf.SettableUDF;
import org.apache.hadoop.hive.ql.udf.UDAFPercentile;
-import org.apache.hadoop.hive.ql.udf.UDFAbs;
import org.apache.hadoop.hive.ql.udf.UDFAcos;
import org.apache.hadoop.hive.ql.udf.UDFAscii;
import org.apache.hadoop.hive.ql.udf.UDFAsin;
@@ -211,7 +210,7 @@ public final class FunctionRegistry {
registerUDF("ceil", UDFCeil.class, false);
registerUDF("ceiling", UDFCeil.class, false);
registerUDF("rand", UDFRand.class, false);
- registerUDF("abs", UDFAbs.class, false);
+ registerGenericUDF("abs", GenericUDFAbs.class);
registerUDF("pmod", UDFPosMod.class, false);
registerUDF("ln", UDFLn.class, false);
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java Mon Sep 23 20:40:54 2013
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
@@ -96,9 +97,10 @@ public class ReduceSinkOperator extends
protected void initializeOp(Configuration hconf) throws HiveException {
try {
- keyEval = new ExprNodeEvaluator[conf.getKeyCols().size()];
+ List<ExprNodeDesc> keys = conf.getKeyCols();
+ keyEval = new ExprNodeEvaluator[keys.size()];
int i = 0;
- for (ExprNodeDesc e : conf.getKeyCols()) {
+ for (ExprNodeDesc e : keys) {
keyEval[i++] = ExprNodeEvaluatorFactory.get(e);
}
@@ -115,7 +117,8 @@ public class ReduceSinkOperator extends
partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
i = 0;
for (ExprNodeDesc e : conf.getPartitionCols()) {
- partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
+ int index = ExprNodeDescUtils.indexOf(e, keys);
+ partitionEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e): keyEval[index];
}
tag = conf.getTag();
@@ -277,27 +280,27 @@ public class ReduceSinkOperator extends
}
// Evaluate the keys
- Object[] distributionKeys = new Object[numDistributionKeys];
for (int i = 0; i < numDistributionKeys; i++) {
- distributionKeys[i] = keyEval[i].evaluate(row);
+ cachedKeys[0][i] = keyEval[i].evaluate(row);
}
-
if (numDistinctExprs > 0) {
// with distinct key(s)
for (int i = 0; i < numDistinctExprs; i++) {
- System.arraycopy(distributionKeys, 0, cachedKeys[i], 0, numDistributionKeys);
- Object[] distinctParameters =
- new Object[distinctColIndices.get(i).size()];
+ if (i > 0) {
+ System.arraycopy(cachedKeys[0], 0, cachedKeys[i], 0, numDistributionKeys);
+ }
+ StandardUnion union = (StandardUnion) cachedKeys[i][numDistributionKeys];
+ if (union == null) {
+ cachedKeys[i][numDistributionKeys] =
+ union = new StandardUnion((byte)i, new Object[distinctColIndices.get(i).size()]);
+ }
+ Object[] distinctParameters = (Object[]) union.getObject();
for (int j = 0; j < distinctParameters.length; j++) {
distinctParameters[j] =
keyEval[distinctColIndices.get(i).get(j)].evaluate(row);
}
- cachedKeys[i][numDistributionKeys] =
- new StandardUnion((byte)i, distinctParameters);
+ union.setTag((byte) i);
}
- } else {
- // no distinct key
- System.arraycopy(distributionKeys, 0, cachedKeys[0], 0, numDistributionKeys);
}
BytesWritable value = null;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Mon Sep 23 20:40:54 2013
@@ -33,6 +33,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.persistence.RowContainer;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext;
import org.apache.hadoop.hive.ql.plan.FetchWork;
@@ -125,8 +126,8 @@ public class SMBMapJoinOperator extends
int bucketSize;
- // For backwards compatibility reasons we honor the older
- // HIVEMAPJOINBUCKETCACHESIZE if set different from default.
+ // For backwards compatibility reasons we honor the older
+ // HIVEMAPJOINBUCKETCACHESIZE if set different from default.
// By hive 0.13 we should remove this code.
int oldVar = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
if (oldVar != 100) {
@@ -186,36 +187,31 @@ public class SMBMapJoinOperator extends
String alias = entry.getKey();
FetchWork fetchWork = entry.getValue();
- Operator<? extends OperatorDesc> forwardOp = aliasToWork.get(alias);
- forwardOp.setExecContext(getExecContext());
+ JobConf jobClone = new JobConf(hconf);
+
+ TableScanOperator ts = (TableScanOperator)aliasToWork.get(alias);
+ // push down projections
+ ColumnProjectionUtils.appendReadColumns(
+ jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns());
+ // push down filters
+ HiveInputFormat.pushFilters(jobClone, ts);
+
+
+ ts.setExecContext(getExecContext());
- JobConf jobClone = cloneJobConf(hconf, forwardOp);
FetchOperator fetchOp = new FetchOperator(fetchWork, jobClone);
- forwardOp.initialize(jobClone, new ObjectInspector[]{fetchOp.getOutputObjectInspector()});
+ ts.initialize(jobClone, new ObjectInspector[]{fetchOp.getOutputObjectInspector()});
fetchOp.clearFetchContext();
DummyStoreOperator sinkOp = aliasToSinkWork.get(alias);
- MergeQueue mergeQueue = new MergeQueue(alias, fetchWork, jobClone, forwardOp, sinkOp);
+ MergeQueue mergeQueue = new MergeQueue(alias, fetchWork, jobClone, ts, sinkOp);
aliasToMergeQueue.put(alias, mergeQueue);
l4j.info("fetch operators for " + alias + " initialized");
}
}
- private JobConf cloneJobConf(Configuration hconf, Operator<?> op) {
- JobConf jobClone = new JobConf(hconf);
- if (op instanceof TableScanOperator) {
- List<Integer> list = ((TableScanOperator)op).getNeededColumnIDs();
- if (list != null) {
- ColumnProjectionUtils.appendReadColumnIDs(jobClone, list);
- }
- } else {
- ColumnProjectionUtils.setFullyReadColumns(jobClone);
- }
- return jobClone;
- }
-
private byte tagForAlias(String alias) {
for (byte tag = 0; tag < tagToAlias.length; tag++) {
if (alias.equals(tagToAlias[tag])) {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java Mon Sep 23 20:40:54 2013
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.ql.exec;
-import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
@@ -31,6 +30,7 @@ import org.apache.hadoop.hive.common.Fil
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
@@ -237,18 +237,22 @@ public class TableScanOperator extends O
return "TS";
}
- // this 'neededColumnIDs' field is included in this operator class instead of
+ // This 'neededColumnIDs' field is included in this operator class instead of
// its desc class.The reason is that 1)tableScanDesc can not be instantiated,
// and 2) it will fail some join and union queries if this is added forcibly
- // into tableScanDesc
- java.util.ArrayList<Integer> neededColumnIDs;
+ // into tableScanDesc.
+ // Both neededColumnIDs and neededColumns should never be null.
+ // When neededColumnIDs is an empty list,
+ // it means no needed column (e.g. we do not need any column to evaluate
+ // SELECT count(*) FROM t).
+ List<Integer> neededColumnIDs;
List<String> neededColumns;
- public void setNeededColumnIDs(java.util.ArrayList<Integer> orign_columns) {
+ public void setNeededColumnIDs(List<Integer> orign_columns) {
neededColumnIDs = orign_columns;
}
- public java.util.ArrayList<Integer> getNeededColumnIDs() {
+ public List<Integer> getNeededColumnIDs() {
return neededColumnIDs;
}
@@ -324,4 +328,14 @@ public class TableScanOperator extends O
public boolean supportAutomaticSortMergeJoin() {
return true;
}
+
+ @Override
+ public Operator<? extends OperatorDesc> clone()
+ throws CloneNotSupportedException {
+ TableScanOperator ts = (TableScanOperator) super.clone();
+ ts.setNeededColumnIDs(new ArrayList<Integer>(getNeededColumnIDs()));
+ ts.setNeededColumns(new ArrayList<String>(getNeededColumns()));
+ return ts;
+ }
+
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java Mon Sep 23 20:40:54 2013
@@ -25,7 +25,6 @@ import java.io.Serializable;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.text.SimpleDateFormat;
-import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.HashMap;
@@ -55,6 +54,7 @@ import org.apache.hadoop.hive.ql.exec.Ut
import org.apache.hadoop.hive.ql.exec.Utilities.StreamPrinter;
import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionException;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext;
import org.apache.hadoop.hive.ql.plan.FetchWork;
@@ -376,20 +376,12 @@ public class MapredLocalTask extends Tas
for (Map.Entry<String, FetchWork> entry : work.getAliasToFetchWork().entrySet()) {
JobConf jobClone = new JobConf(job);
- Operator<? extends OperatorDesc> tableScan =
- work.getAliasToWork().get(entry.getKey());
- boolean setColumnsNeeded = false;
- if (tableScan instanceof TableScanOperator) {
- ArrayList<Integer> list = ((TableScanOperator) tableScan).getNeededColumnIDs();
- if (list != null) {
- ColumnProjectionUtils.appendReadColumnIDs(jobClone, list);
- setColumnsNeeded = true;
- }
- }
-
- if (!setColumnsNeeded) {
- ColumnProjectionUtils.setFullyReadColumns(jobClone);
- }
+ TableScanOperator ts = (TableScanOperator)work.getAliasToWork().get(entry.getKey());
+ // push down projections
+ ColumnProjectionUtils.appendReadColumns(
+ jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns());
+ // push down filters
+ HiveInputFormat.pushFilters(jobClone, ts);
// create a fetch operator
FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone);
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java Mon Sep 23 20:40:54 2013
@@ -66,16 +66,14 @@ public class BucketizedHiveInputFormat<K
throw new IOException("cannot find class " + inputFormatClassName);
}
- // clone a jobConf for setting needed columns for reading
- JobConf cloneJobConf = new JobConf(job);
- pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
+ pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath()
.toString(), hsplit.getPath().toUri().getPath());
- InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
- cloneJobConf);
- BucketizedHiveRecordReader<K, V> rr= new BucketizedHiveRecordReader(inputFormat, hsplit, cloneJobConf,
+ InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
+
+ BucketizedHiveRecordReader<K, V> rr= new BucketizedHiveRecordReader(inputFormat, hsplit, job,
reporter);
- rr.initIOContext(hsplit, cloneJobConf, inputFormatClass);
+ rr.initIOContext(hsplit, job, inputFormatClass);
return rr;
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Mon Sep 23 20:40:54 2013
@@ -217,9 +217,6 @@ public class HiveInputFormat<K extends W
throw new IOException("cannot find class " + inputFormatClassName, e);
}
- // clone a jobConf for setting needed columns for reading
- JobConf cloneJobConf = new JobConf(job);
-
if (this.mrwork == null) {
init(job);
}
@@ -227,22 +224,20 @@ public class HiveInputFormat<K extends W
boolean nonNative = false;
PartitionDesc part = pathToPartitionInfo.get(hsplit.getPath().toString());
if ((part != null) && (part.getTableDesc() != null)) {
- Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
+ Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), job);
nonNative = part.getTableDesc().isNonNative();
}
- pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
+ pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath()
.toString(), hsplit.getPath().toUri().getPath(), nonNative);
- InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
- cloneJobConf);
+ InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
RecordReader innerReader = null;
try {
- innerReader = inputFormat.getRecordReader(inputSplit,
- cloneJobConf, reporter);
+ innerReader = inputFormat.getRecordReader(inputSplit, job, reporter);
} catch (Exception e) {
innerReader = HiveIOExceptionHandlerUtil
- .handleRecordReaderCreationException(e, cloneJobConf);
+ .handleRecordReaderCreationException(e, job);
}
HiveRecordReader<K,V> rr = new HiveRecordReader(innerReader, job);
rr.initIOContext(hsplit, job, inputFormatClass, innerReader);
@@ -420,20 +415,13 @@ public class HiveInputFormat<K extends W
for (String alias : aliases) {
Operator<? extends OperatorDesc> op = this.mrwork.getAliasToWork().get(
alias);
- if (op != null && op instanceof TableScanOperator) {
- TableScanOperator tableScan = (TableScanOperator) op;
-
- // push down projections
- ArrayList<Integer> list = tableScan.getNeededColumnIDs();
- if (list != null) {
- ColumnProjectionUtils.appendReadColumnIDs(jobConf, list);
- } else {
- ColumnProjectionUtils.setFullyReadColumns(jobConf);
- }
- ColumnProjectionUtils.appendReadColumnNames(jobConf,
- tableScan.getNeededColumns());
-
- pushFilters(jobConf, tableScan);
+ if (op instanceof TableScanOperator) {
+ TableScanOperator ts = (TableScanOperator) op;
+ // push down projections.
+ ColumnProjectionUtils.appendReadColumns(
+ jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns());
+ // push down filters
+ pushFilters(jobConf, ts);
}
}
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java Mon Sep 23 20:40:54 2013
@@ -27,6 +27,7 @@ import java.io.IOException;
import java.rmi.server.UID;
import java.security.MessageDigest;
import java.util.Arrays;
+import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -1379,27 +1380,22 @@ public class RCFile {
columnNumber = Integer.parseInt(metadata.get(
new Text(COLUMN_NUMBER_METADATA_STR)).toString());
- java.util.ArrayList<Integer> notSkipIDs = ColumnProjectionUtils
+ List<Integer> notSkipIDs = ColumnProjectionUtils
.getReadColumnIDs(conf);
boolean[] skippedColIDs = new boolean[columnNumber];
- if (notSkipIDs.size() > 0) {
- for (int i = 0; i < skippedColIDs.length; i++) {
- skippedColIDs[i] = true;
- }
+ if(ColumnProjectionUtils.isReadAllColumns(conf)) {
+ Arrays.fill(skippedColIDs, false);
+ } else if (notSkipIDs.size() > 0) {
+ Arrays.fill(skippedColIDs, true);
for (int read : notSkipIDs) {
if (read < columnNumber) {
skippedColIDs[read] = false;
}
}
} else {
- // TODO: if no column name is specified e.g, in select count(1) from tt;
- // skip all columns, this should be distinguished from the case:
- // select * from tt;
- for (int i = 0; i < skippedColIDs.length; i++) {
- skippedColIDs[i] = false;
- }
+ // select count(1)
+ Arrays.fill(skippedColIDs, true);
}
-
loadColumnNum = columnNumber;
if (skippedColIDs.length > 0) {
for (boolean skippedColID : skippedColIDs) {
@@ -1584,7 +1580,7 @@ public class RCFile {
byte[] buffer = new byte[prefix+n];
n = (int)Math.min(n, end - in.getPos());
/* fill array with a pattern that will never match sync */
- Arrays.fill(buffer, (byte)(~sync[0]));
+ Arrays.fill(buffer, (byte)(~sync[0]));
while(n > 0 && (in.getPos() + n) <= end) {
position = in.getPos();
in.readFully(buffer, prefix, n);
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java Mon Sep 23 20:40:54 2013
@@ -19,14 +19,14 @@
package org.apache.hadoop.hive.ql.io;
import java.io.IOException;
-import java.util.Map;
-import java.util.ArrayList;
import java.util.Collections;
+import java.util.Map;
import java.util.WeakHashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.RCFile.KeyBuffer;
import org.apache.hadoop.hive.ql.io.RCFile.Reader;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
@@ -35,8 +35,6 @@ import org.apache.hadoop.mapred.FileSpli
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.util.ReflectionUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
-
/**
* RCFileRecordReader.
*
@@ -71,7 +69,7 @@ public class RCFileRecordReader<K extend
public void put(FileSplit split, long endSync) {
Path path = split.getPath();
- long end = split.getStart() + split.getLength();
+ long end = split.getStart() + split.getLength();
String key = path.toString()+"+"+String.format("%d",end);
RCFileSyncEntry entry = new RCFileSyncEntry();
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java Mon Sep 23 20:40:54 2013
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.io.avr
import java.io.IOException;
+import java.rmi.server.UID;
import java.util.Map;
import java.util.Properties;
@@ -57,6 +58,10 @@ public class AvroGenericRecordReader imp
final private long start;
final private long stop;
protected JobConf jobConf;
+ /**
+ * A unique ID for each record reader.
+ */
+ final private UID recordReaderID;
public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) throws IOException {
this.jobConf = job;
@@ -78,6 +83,7 @@ public class AvroGenericRecordReader imp
this.reader.sync(split.getStart());
this.start = reader.tell();
this.stop = split.getStart() + split.getLength();
+ this.recordReaderID = new UID();
}
/**
@@ -148,6 +154,7 @@ public class AvroGenericRecordReader imp
GenericData.Record r = (GenericData.Record)reader.next();
record.setRecord(r);
+ record.setRecordReaderID(recordReaderID);
return true;
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Mon Sep 23 20:40:54 2013
@@ -20,9 +20,13 @@ package org.apache.hadoop.hive.ql.io.orc
import java.io.IOException;
import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
-
-import com.google.common.collect.Lists;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -51,14 +55,6 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;
-
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-
/**
* A MapReduce/Hive input format for ORC files.
*/
@@ -125,7 +121,7 @@ public class OrcInputFormat implements
} else {
LOG.info("No ORC pushdown predicate");
}
- this.reader = file.rows(offset, length,includeColumn, sarg, columnNames);
+ this.reader = file.rows(offset, length, includeColumn, sarg, columnNames);
this.offset = offset;
this.length = length;
}
@@ -199,9 +195,7 @@ public class OrcInputFormat implements
*/
static boolean[] findIncludedColumns(List<OrcProto.Type> types,
Configuration conf) {
- String includedStr =
- conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
- if (includedStr == null || includedStr.trim().length() == 0) {
+ if (ColumnProjectionUtils.isReadAllColumns(conf)) {
return null;
} else {
int numColumns = types.size();
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Mon Sep 23 20:40:54 2013
@@ -301,7 +301,7 @@ public final class ColumnPrunerProcFacto
.genColLists((Operator<? extends OperatorDesc>) nd);
cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd,
cols);
- ArrayList<Integer> needed_columns = new ArrayList<Integer>();
+ List<Integer> neededColumnIds = new ArrayList<Integer>();
List<String> neededColumnNames = new ArrayList<String>();
RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRowResolver();
TableScanDesc desc = scanOp.getConf();
@@ -332,15 +332,15 @@ public final class ColumnPrunerProcFacto
continue;
}
int position = inputRR.getPosition(cols.get(i));
- if (position >=0) {
+ if (position >= 0) {
// get the needed columns by id and name
- needed_columns.add(position);
+ neededColumnIds.add(position);
neededColumnNames.add(cols.get(i));
}
}
desc.setVirtualCols(newVirtualCols);
- scanOp.setNeededColumnIDs(needed_columns);
+ scanOp.setNeededColumnIDs(neededColumnIds);
scanOp.setNeededColumns(neededColumnNames);
return null;
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java Mon Sep 23 20:40:54 2013
@@ -64,7 +64,6 @@ import org.apache.hadoop.hive.ql.plan.Op
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.StatsWork;
import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.mapred.InputFormat;
@@ -299,7 +298,7 @@ public class GenMRFileSink1 implements N
// Create a TableScan operator
RowSchema inputRS = fsInput.getSchema();
Operator<? extends OperatorDesc> tsMerge =
- OperatorFactory.get(TableScanDesc.class, inputRS);
+ GenMapRedUtils.createTemporaryTableScanOperator(inputRS);
// Create a FileSink operator
TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java Mon Sep 23 20:40:54 2013
@@ -19,15 +19,12 @@
package org.apache.hadoop.hive.ql.optimizer;
import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
import java.util.Map;
import java.util.Stack;
-import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
@@ -41,12 +38,10 @@ import org.apache.hadoop.hive.ql.optimiz
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcFactory;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
/**
* Processor for the rule - TableScan followed by Union.
@@ -131,33 +126,15 @@ public class GenMRUnion1 implements Node
Context baseCtx = parseCtx.getContext();
String taskTmpDir = baseCtx.getMRTmpFileURI();
- // Create a file sink operator for this file name
- Operator<? extends OperatorDesc> fs_op = OperatorFactory.get(
- new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
- HiveConf.ConfVars.COMPRESSINTERMEDIATE)), parent.getSchema());
-
- assert parent.getChildOperators().size() == 1;
- parent.getChildOperators().set(0, fs_op);
-
- List<Operator<? extends OperatorDesc>> parentOpList =
- new ArrayList<Operator<? extends OperatorDesc>>();
- parentOpList.add(parent);
- fs_op.setParentOperators(parentOpList);
-
- // Create a dummy table scan operator
- Operator<? extends OperatorDesc> ts_op = OperatorFactory.get(
- new TableScanDesc(), parent.getSchema());
- List<Operator<? extends OperatorDesc>> childOpList =
- new ArrayList<Operator<? extends OperatorDesc>>();
- childOpList.add(child);
- ts_op.setChildOperators(childOpList);
- child.replaceParent(parent, ts_op);
+ // Create the temporary file, its corresponding FileSinkOperaotr, and
+ // its corresponding TableScanOperator.
+ TableScanOperator tableScanOp =
+ GenMapRedUtils.createTemporaryFile(parent, child, taskTmpDir, tt_desc, parseCtx);
// Add the path to alias mapping
-
uCtxTask.addTaskTmpDir(taskTmpDir);
uCtxTask.addTTDesc(tt_desc);
- uCtxTask.addListTopOperators(ts_op);
+ uCtxTask.addListTopOperators(tableScanOp);
// The union task is empty. The files created for all the inputs are
// assembled in the union context and later used to initialize the union
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Mon Sep 23 20:40:54 2013
@@ -34,12 +34,14 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.DemuxOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
@@ -864,6 +866,70 @@ public final class GenMapRedUtils {
return op;
}
+ public static TableScanOperator createTemporaryTableScanOperator(RowSchema rowSchema) {
+ TableScanOperator tableScanOp =
+ (TableScanOperator) OperatorFactory.get(new TableScanDesc(), rowSchema);
+ // Set needed columns for this dummy TableScanOperator
+ List<Integer> neededColumnIds = new ArrayList<Integer>();
+ List<String> neededColumnNames = new ArrayList<String>();
+ List<ColumnInfo> parentColumnInfos = rowSchema.getSignature();
+ for (int i = 0 ; i < parentColumnInfos.size(); i++) {
+ neededColumnIds.add(i);
+ neededColumnNames.add(parentColumnInfos.get(i).getInternalName());
+ }
+ tableScanOp.setNeededColumnIDs(neededColumnIds);
+ tableScanOp.setNeededColumns(neededColumnNames);
+ return tableScanOp;
+ }
+
+ /**
+ * Break the pipeline between parent and child, and then
+ * output data generated by parent to a temporary file stored in taskTmpDir.
+ * A FileSinkOperator is added after parent to output the data.
+ * Before child, we add a TableScanOperator to load data stored in the temporary
+ * file back.
+ * @param parent
+ * @param child
+ * @param taskTmpDir
+ * @param tt_desc
+ * @param parseCtx
+ * @return The TableScanOperator inserted before child.
+ */
+ protected static TableScanOperator createTemporaryFile(
+ Operator<? extends OperatorDesc> parent, Operator<? extends OperatorDesc> child,
+ String taskTmpDir, TableDesc tt_desc, ParseContext parseCtx) {
+
+ // Create a FileSinkOperator for the file name of taskTmpDir
+ boolean compressIntermediate =
+ parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE);
+ FileSinkDesc desc = new FileSinkDesc(taskTmpDir, tt_desc, compressIntermediate);
+ if (compressIntermediate) {
+ desc.setCompressCodec(parseCtx.getConf().getVar(
+ HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
+ desc.setCompressType(parseCtx.getConf().getVar(
+ HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
+ }
+ Operator<? extends OperatorDesc> fileSinkOp = putOpInsertMap(OperatorFactory
+ .get(desc, parent.getSchema()), null, parseCtx);
+
+ // Connect parent to fileSinkOp
+ parent.replaceChild(child, fileSinkOp);
+ fileSinkOp.setParentOperators(Utilities.makeList(parent));
+
+ // Create a dummy TableScanOperator for the file generated through fileSinkOp
+ RowResolver parentRowResolver =
+ parseCtx.getOpParseCtx().get(parent).getRowResolver();
+ TableScanOperator tableScanOp = (TableScanOperator) putOpInsertMap(
+ createTemporaryTableScanOperator(parent.getSchema()),
+ parentRowResolver, parseCtx);
+
+ // Connect this TableScanOperator to child.
+ tableScanOp.setChildOperators(Utilities.makeList(child));
+ child.replaceParent(parent, tableScanOp);
+
+ return tableScanOp;
+ }
+
@SuppressWarnings("nls")
/**
* Split two tasks by creating a temporary file between them.
@@ -891,7 +957,7 @@ public final class GenMapRedUtils {
rootTasks.remove(childTask);
}
- // generate the temporary file
+ // Generate the temporary file name
Context baseCtx = parseCtx.getContext();
String taskTmpDir = baseCtx.getMRTmpFileURI();
@@ -899,55 +965,18 @@ public final class GenMapRedUtils {
TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils
.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
- // Create a file sink operator for this file name
- boolean compressIntermediate = parseCtx.getConf().getBoolVar(
- HiveConf.ConfVars.COMPRESSINTERMEDIATE);
- FileSinkDesc desc = new FileSinkDesc(taskTmpDir, tt_desc,
- compressIntermediate);
- if (compressIntermediate) {
- desc.setCompressCodec(parseCtx.getConf().getVar(
- HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
- desc.setCompressType(parseCtx.getConf().getVar(
- HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
- }
- Operator<? extends OperatorDesc> fs_op = putOpInsertMap(OperatorFactory
- .get(desc, parent.getSchema()), null, parseCtx);
-
- // replace the reduce child with this operator
- List<Operator<? extends OperatorDesc>> childOpList = parent
- .getChildOperators();
- for (int pos = 0; pos < childOpList.size(); pos++) {
- if (childOpList.get(pos) == op) {
- childOpList.set(pos, fs_op);
- break;
- }
- }
-
- List<Operator<? extends OperatorDesc>> parentOpList =
- new ArrayList<Operator<? extends OperatorDesc>>();
- parentOpList.add(parent);
- fs_op.setParentOperators(parentOpList);
-
- // create a dummy tableScan operator on top of op
- // TableScanOperator is implicitly created here for each MapOperator
- RowResolver rowResolver = opProcCtx.getParseCtx().getOpParseCtx().get(parent).getRowResolver();
- Operator<? extends OperatorDesc> ts_op = putOpInsertMap(OperatorFactory
- .get(TableScanDesc.class, parent.getSchema()), rowResolver, parseCtx);
-
- childOpList = new ArrayList<Operator<? extends OperatorDesc>>();
- childOpList.add(op);
- ts_op.setChildOperators(childOpList);
- op.getParentOperators().set(0, ts_op);
+ // Create the temporary file, its corresponding FileSinkOperaotr, and
+ // its corresponding TableScanOperator.
+ TableScanOperator tableScanOp =
+ createTemporaryFile(parent, op, taskTmpDir, tt_desc, parseCtx);
Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx =
opProcCtx.getMapCurrCtx();
- mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null));
+ mapCurrCtx.put(tableScanOp, new GenMapRedCtx(childTask, null));
String streamDesc = taskTmpDir;
MapredWork cplan = (MapredWork) childTask.getWork();
- Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
-
if (needsTagging(cplan.getReduceWork())) {
String origStreamDesc;
streamDesc = "$INTNAME";
@@ -963,7 +992,7 @@ public final class GenMapRedUtils {
}
// Add the path to alias mapping
- setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan.getMapWork(), false, tt_desc);
+ setTaskPlan(taskTmpDir, streamDesc, tableScanOp, cplan.getMapWork(), false, tt_desc);
opProcCtx.setCurrTopOp(null);
opProcCtx.setCurrAliasId(null);
opProcCtx.setCurrTask(childTask);
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Mon Sep 23 20:40:54 2013
@@ -61,9 +61,7 @@ public class Optimizer {
transformations.add(new ListBucketingPruner());
}
}
- if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCP)) {
- transformations.add(new ColumnPruner());
- }
+ transformations.add(new ColumnPruner());
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME)) {
transformations.add(new SkewJoinOptimizer());
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java Mon Sep 23 20:40:54 2013
@@ -27,6 +27,7 @@ import java.util.Map;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -38,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.Ta
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ConditionalResolverSkewJoin;
@@ -54,7 +56,6 @@ import org.apache.hadoop.hive.ql.plan.Op
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -157,6 +158,7 @@ public final class GenMRSkewJoinProcesso
.getProperties());
Map<Byte, TableDesc> tableDescList = new HashMap<Byte, TableDesc>();
+ Map<Byte, RowSchema> rowSchemaList = new HashMap<Byte, RowSchema>();
Map<Byte, List<ExprNodeDesc>> newJoinValues = new HashMap<Byte, List<ExprNodeDesc>>();
Map<Byte, List<ExprNodeDesc>> newJoinKeys = new HashMap<Byte, List<ExprNodeDesc>>();
// used for create mapJoinDesc, should be in order
@@ -174,13 +176,17 @@ public final class GenMRSkewJoinProcesso
int columnSize = valueCols.size();
List<ExprNodeDesc> newValueExpr = new ArrayList<ExprNodeDesc>();
List<ExprNodeDesc> newKeyExpr = new ArrayList<ExprNodeDesc>();
+ ArrayList<ColumnInfo> columnInfos = new ArrayList<ColumnInfo>();
boolean first = true;
for (int k = 0; k < columnSize; k++) {
TypeInfo type = valueCols.get(k).getTypeInfo();
String newColName = i + "_VALUE_" + k; // any name, it does not matter.
- newValueExpr
- .add(new ExprNodeColumnDesc(type, newColName, "" + i, false));
+ ColumnInfo columnInfo = new ColumnInfo(newColName, type, alias.toString(), false);
+ columnInfos.add(columnInfo);
+ newValueExpr.add(new ExprNodeColumnDesc(
+ columnInfo.getType(), columnInfo.getInternalName(),
+ columnInfo.getTabAlias(), false));
if (!first) {
colNames = colNames + ",";
colTypes = colTypes + ",";
@@ -199,14 +205,18 @@ public final class GenMRSkewJoinProcesso
first = false;
colNames = colNames + joinKeys.get(k);
colTypes = colTypes + joinKeyTypes.get(k);
- newKeyExpr.add(new ExprNodeColumnDesc(TypeInfoFactory
- .getPrimitiveTypeInfo(joinKeyTypes.get(k)), joinKeys.get(k),
- "" + i, false));
+ ColumnInfo columnInfo = new ColumnInfo(joinKeys.get(k), TypeInfoFactory
+ .getPrimitiveTypeInfo(joinKeyTypes.get(k)), alias.toString(), false);
+ columnInfos.add(columnInfo);
+ newKeyExpr.add(new ExprNodeColumnDesc(
+ columnInfo.getType(), columnInfo.getInternalName(),
+ columnInfo.getTabAlias(), false));
}
newJoinValues.put(alias, newValueExpr);
newJoinKeys.put(alias, newKeyExpr);
tableDescList.put(alias, Utilities.getTableDesc(colNames, colTypes));
+ rowSchemaList.put(alias, new RowSchema(columnInfos));
// construct value table Desc
String valueColNames = "";
@@ -243,8 +253,8 @@ public final class GenMRSkewJoinProcesso
Operator<? extends OperatorDesc>[] parentOps = new TableScanOperator[tags.length];
for (int k = 0; k < tags.length; k++) {
- Operator<? extends OperatorDesc> ts = OperatorFactory.get(
- TableScanDesc.class, (RowSchema) null);
+ Operator<? extends OperatorDesc> ts =
+ GenMapRedUtils.createTemporaryTableScanOperator(rowSchemaList.get((byte)k));
((TableScanOperator)ts).setTableDesc(tableDescList.get((byte)k));
parentOps[k] = ts;
}
@@ -256,13 +266,9 @@ public final class GenMRSkewJoinProcesso
String bigKeyDirPath = bigKeysDirMap.get(src);
newPlan.getPathToAliases().put(bigKeyDirPath, aliases);
-
-
-
newPlan.getAliasToWork().put(alias, tblScan_op);
PartitionDesc part = new PartitionDesc(tableDescList.get(src), null);
-
newPlan.getPathToPartitionInfo().put(bigKeyDirPath, part);
newPlan.getAliasToPartnInfo().put(alias, part);
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java Mon Sep 23 20:40:54 2013
@@ -53,6 +53,7 @@ import org.apache.hadoop.hive.ql.plan.Ma
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde2.NullStructSerDe;
/**
@@ -128,12 +129,15 @@ public class MetadataOnlyOptimizer imple
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TableScanOperator node = (TableScanOperator) nd;
+ TableScanOperator tsOp = (TableScanOperator) nd;
WalkerCtx walkerCtx = (WalkerCtx) procCtx;
- if (((node.getNeededColumnIDs() == null) || (node.getNeededColumnIDs().size() == 0))
- && ((node.getConf() == null) ||
- (node.getConf().getVirtualCols() == null) ||
- (node.getConf().getVirtualCols().isEmpty()))) {
- walkerCtx.setMayBeMetadataOnly(node);
+ List<Integer> colIDs = tsOp.getNeededColumnIDs();
+ TableScanDesc desc = tsOp.getConf();
+ boolean noColNeeded = (colIDs == null) || (colIDs.isEmpty());
+ boolean noVCneeded = (desc == null) || (desc.getVirtualCols() == null)
+ || (desc.getVirtualCols().isEmpty());
+ if (noColNeeded && noVCneeded) {
+ walkerCtx.setMayBeMetadataOnly(tsOp);
}
return nd;
}
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java Mon Sep 23 20:40:54 2013
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.io;
+import static org.junit.Assert.assertEquals;
+
import java.io.IOException;
import java.util.Random;
@@ -32,7 +34,6 @@ import org.apache.hadoop.io.SequenceFile
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.DefaultCodec;
-import static org.junit.Assert.*;
/**
* PerformTestRCFileAndSeqFile.
@@ -300,7 +301,7 @@ public class PerformTestRCFileAndSeqFile
java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
readCols.add(Integer.valueOf(0));
- ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
+ ColumnProjectionUtils.appendReadColumns(conf, readCols);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
@@ -335,7 +336,7 @@ public class PerformTestRCFileAndSeqFile
java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
readCols.add(Integer.valueOf(0));
readCols.add(Integer.valueOf(allColumnsNumber - 1));
- ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
+ ColumnProjectionUtils.appendReadColumns(conf, readCols);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
@@ -370,7 +371,7 @@ public class PerformTestRCFileAndSeqFile
int actualReadCount = 0;
- ColumnProjectionUtils.setFullyReadColumns(conf);
+ ColumnProjectionUtils.setReadAllColumns(conf);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java Mon Sep 23 20:40:54 2013
@@ -18,6 +18,14 @@
package org.apache.hadoop.hive.ql.io;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -28,8 +36,6 @@ import java.util.List;
import java.util.Properties;
import java.util.Random;
-import static org.junit.Assert.*;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -58,11 +64,11 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.FileSplit;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -83,20 +89,21 @@ public class TestRCFile {
// Data
- private Writable[] expectedFieldsData = {
+ private final Writable[] expectedFieldsData = {
new ByteWritable((byte) 123), new ShortWritable((short) 456),
new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3),
new Text("hive and hadoop"), null, null};
- private Object[] expectedPartitalFieldsData = {null, null,
+ private final Object[] expectedPartitalFieldsData = {null, null,
new IntWritable(789), new LongWritable(1000), null, null, null, null};
- private BytesRefArrayWritable patialS = new BytesRefArrayWritable();
+ private final BytesRefArrayWritable patialS = new BytesRefArrayWritable();
private byte[][] bytesArray;
private BytesRefArrayWritable s;
@Before
public void setup() throws Exception {
conf = new Configuration();
+ ColumnProjectionUtils.setReadAllColumns(conf);
fs = FileSystem.getLocal(conf);
dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred");
file = new Path(dir, "test_rcfile");
@@ -511,7 +518,7 @@ public class TestRCFile {
throws IOException, SerDeException {
LOG.debug("reading " + count + " records");
long start = System.currentTimeMillis();
- ColumnProjectionUtils.setFullyReadColumns(conf);
+ ColumnProjectionUtils.setReadAllColumns(conf);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
@@ -556,7 +563,7 @@ public class TestRCFile {
java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
readCols.add(Integer.valueOf(2));
readCols.add(Integer.valueOf(3));
- ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
+ ColumnProjectionUtils.appendReadColumns(conf, readCols);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Mon Sep 23 20:40:54 2013
@@ -17,6 +17,20 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -30,6 +44,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -54,21 +69,6 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-
public class TestInputOutputFormat {
Path workDir = new Path(System.getProperty("test.tmp.dir","target/test/tmp"));
@@ -574,7 +574,7 @@ public class TestInputOutputFormat {
reader.close();
// read just the first column
- conf.set("hive.io.file.readcolumn.ids", "0");
+ ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(0));
reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
key = reader.createKey();
value = (Writable) reader.createValue();
@@ -589,7 +589,7 @@ public class TestInputOutputFormat {
reader.close();
// test the mapping of empty string to all columns
- conf.set("hive.io.file.readcolumn.ids", "");
+ ColumnProjectionUtils.setReadAllColumns(conf);
reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
key = reader.createKey();
value = (Writable) reader.createValue();
@@ -655,7 +655,7 @@ public class TestInputOutputFormat {
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(1, splits.length);
- conf.set("hive.io.file.readcolumn.ids", "1");
+ ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1));
org.apache.hadoop.mapred.RecordReader reader =
in.getRecordReader(splits[0], conf, Reporter.NULL);
Object key = reader.createKey();
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q Mon Sep 23 20:40:54 2013
@@ -1,7 +1,7 @@
set hive.archive.enabled = true;
set hive.enforce.bucketing = true;
--- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20)
drop table tstsrc;
drop table tstsrcpart;
Modified: hive/branches/vectorization/ql/src/test/queries/clientpositive/binary_table_colserde.q
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/queries/clientpositive/binary_table_colserde.q?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/queries/clientpositive/binary_table_colserde.q (original)
+++ hive/branches/vectorization/ql/src/test/queries/clientpositive/binary_table_colserde.q Mon Sep 23 20:40:54 2013
@@ -3,6 +3,7 @@ drop table ba_test;
-- Everything in ba_table1.q + columnar serde in RCFILE.
create table ba_test (ba_key binary, ba_val binary) stored as rcfile;
+alter table ba_test set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
describe extended ba_test;
Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out Mon Sep 23 20:40:54 2013
@@ -35,11 +35,13 @@ POSTHOOK: Lineage: tstsrc.value SIMPLE [
PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11')
select key, value from srcpart where ds='2008-04-08' and hr='11'
PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11
POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11')
select key, value from srcpart where ds='2008-04-08' and hr='11'
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11
POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -49,11 +51,13 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12')
select key, value from srcpart where ds='2008-04-08' and hr='12'
PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12
POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12')
select key, value from srcpart where ds='2008-04-08' and hr='12'
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12
POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -65,11 +69,13 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11')
select key, value from srcpart where ds='2008-04-09' and hr='11'
PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11
POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11')
select key, value from srcpart where ds='2008-04-09' and hr='11'
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11
POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -83,11 +89,13 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12')
select key, value from srcpart where ds='2008-04-09' and hr='12'
PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12
POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12')
select key, value from srcpart where ds='2008-04-09' and hr='12'
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12
POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -103,12 +111,14 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
PREHOOK: type: QUERY
+PREHOOK: Input: default@tstsrcpart
PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tstsrcpart
POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
@@ -144,12 +154,14 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
PREHOOK: type: QUERY
+PREHOOK: Input: default@tstsrcpart
PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tstsrcpart
POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
@@ -166,10 +178,12 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
48479881068
PREHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key
PREHOOK: type: QUERY
+PREHOOK: Input: default@tstsrcpart
PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
POSTHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tstsrcpart
POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -187,12 +201,14 @@ PREHOOK: query: SELECT * FROM tstsrcpart
WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0'
PREHOOK: type: QUERY
PREHOOK: Input: default@tstsrc
+PREHOOK: Input: default@tstsrcpart
PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key
WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tstsrc
+POSTHOOK: Input: default@tstsrcpart
POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -235,12 +251,14 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
PREHOOK: type: QUERY
+PREHOOK: Input: default@tstsrcpart
PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tstsrcpart
POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
@@ -295,10 +313,12 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
PREHOOK: type: QUERY
+PREHOOK: Input: default@harbucket
PREHOOK: Input: default@harbucket@ds=1
#### A masked pattern was here ####
POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@harbucket
POSTHOOK: Input: default@harbucket@ds=1
#### A masked pattern was here ####
POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ]
@@ -339,10 +359,12 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
PREHOOK: type: QUERY
+PREHOOK: Input: default@harbucket
PREHOOK: Input: default@harbucket@ds=1
#### A masked pattern was here ####
POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@harbucket
POSTHOOK: Input: default@harbucket@ds=1
#### A masked pattern was here ####
POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ]
@@ -383,10 +405,12 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
PREHOOK: type: QUERY
+PREHOOK: Input: default@harbucket
PREHOOK: Input: default@harbucket@ds=1
#### A masked pattern was here ####
POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@harbucket
POSTHOOK: Input: default@harbucket@ds=1
#### A masked pattern was here ####
POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ]
@@ -467,11 +491,13 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2
PREHOOK: type: QUERY
+PREHOOK: Input: default@old_name
PREHOOK: Input: default@old_name@ds=1
#### A masked pattern was here ####
POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@old_name
POSTHOOK: Input: default@old_name@ds=1
#### A masked pattern was here ####
POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ]
@@ -511,11 +537,13 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(
PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2
PREHOOK: type: QUERY
+PREHOOK: Input: default@new_name
PREHOOK: Input: default@new_name@ds=1
#### A masked pattern was here ####
POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2
POSTHOOK: type: QUERY
+POSTHOOK: Input: default@new_name
POSTHOOK: Input: default@new_name@ds=1
#### A masked pattern was here ####
POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ]
Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join0.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join0.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join0.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join0.q.out Mon Sep 23 20:40:54 2013
@@ -154,6 +154,7 @@ STAGE PLANS:
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
+ TableScan
Reduce Output Operator
sort order:
tag: -1
Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join15.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join15.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join15.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join15.q.out Mon Sep 23 20:40:54 2013
@@ -126,6 +126,7 @@ STAGE PLANS:
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
+ TableScan
Reduce Output Operator
sort order:
tag: -1
Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18.q.out Mon Sep 23 20:40:54 2013
@@ -101,6 +101,7 @@ STAGE PLANS:
Map Reduce
Alias -> Map Operator Tree:
$INTNAME
+ TableScan
Reduce Output Operator
key expressions:
expr: _col0
@@ -116,6 +117,7 @@ STAGE PLANS:
expr: _col1
type: bigint
$INTNAME1
+ TableScan
Reduce Output Operator
key expressions:
expr: _col0
@@ -167,6 +169,7 @@ STAGE PLANS:
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
+ TableScan
Reduce Output Operator
sort order:
tag: -1
Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out Mon Sep 23 20:40:54 2013
@@ -109,6 +109,7 @@ STAGE PLANS:
Map Reduce
Alias -> Map Operator Tree:
$INTNAME
+ TableScan
Reduce Output Operator
key expressions:
expr: _col0
@@ -126,6 +127,7 @@ STAGE PLANS:
expr: _col2
type: bigint
$INTNAME1
+ TableScan
Reduce Output Operator
key expressions:
expr: _col0
@@ -179,6 +181,7 @@ STAGE PLANS:
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
+ TableScan
Reduce Output Operator
sort order:
tag: -1
Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join20.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join20.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join20.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join20.q.out Mon Sep 23 20:40:54 2013
@@ -184,6 +184,7 @@ STAGE PLANS:
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
+ TableScan
Reduce Output Operator
sort order:
tag: -1
@@ -213,7 +214,6 @@ STAGE PLANS:
Fetch Operator
limit: -1
-
PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3))
from (
SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3
@@ -419,6 +419,7 @@ STAGE PLANS:
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
+ TableScan
Reduce Output Operator
sort order:
tag: -1
@@ -448,7 +449,6 @@ STAGE PLANS:
Fetch Operator
limit: -1
-
PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3))
from (
SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3
Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join27.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join27.q.out?rev=1525692&r1=1525691&r2=1525692&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join27.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/auto_join27.q.out Mon Sep 23 20:40:54 2013
@@ -201,6 +201,7 @@ STAGE PLANS:
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
+ TableScan
Reduce Output Operator
sort order:
tag: -1