You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/11/05 08:01:58 UTC
svn commit: r1538880 [5/46] - in /hive/branches/tez: ./ ant/
ant/src/org/apache/hadoop/hive/ant/ beeline/
beeline/src/java/org/apache/hive/beeline/ beeline/src/main/
beeline/src/test/org/apache/hive/beeline/src/test/ cli/ common/
common/src/java/conf/ ...
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java Tue Nov 5 07:01:32 2013
@@ -19,12 +19,14 @@
package org.apache.hadoop.hive.ql.exec.vector;
import java.io.IOException;
+import java.util.Arrays;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.TopNHash;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
@@ -39,6 +41,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
public class VectorReduceSinkOperator extends ReduceSinkOperator {
@@ -51,42 +54,44 @@ public class VectorReduceSinkOperator ex
* The evaluators for the key columns. Key columns decide the sort order on
* the reducer side. Key columns are passed to the reducer in the "key".
*/
- protected VectorExpression[] keyEval;
+ private VectorExpression[] keyEval;
/**
* The key value writers. These know how to write the necessary writable type
* based on key column metadata, from the primitive vector type.
*/
- protected transient VectorExpressionWriter[] keyWriters;
+ private transient VectorExpressionWriter[] keyWriters;
/**
* The evaluators for the value columns. Value columns are passed to reducer
* in the "value".
*/
- protected VectorExpression[] valueEval;
+ private VectorExpression[] valueEval;
/**
* The output value writers. These know how to write the necessary writable type
* based on value column metadata, from the primitive vector type.
*/
- protected transient VectorExpressionWriter[] valueWriters;
+ private transient VectorExpressionWriter[] valueWriters;
/**
* The evaluators for the partition columns (CLUSTER BY or DISTRIBUTE BY in
* Hive language). Partition columns decide the reducer that the current row
* goes to. Partition columns are not passed to reducer.
*/
- protected VectorExpression[] partitionEval;
+ private VectorExpression[] partitionEval;
/**
* The partition value writers. These know how to write the necessary writable type
* based on partition column metadata, from the primitive vector type.
*/
- protected transient VectorExpressionWriter[] partitionWriters;
+ private transient VectorExpressionWriter[] partitionWriters;
- transient ObjectInspector keyObjectInspector;
- transient ObjectInspector valueObjectInspector;
- transient int [] keyHashCode = new int [VectorizedRowBatch.DEFAULT_SIZE];
+ private transient ObjectInspector keyObjectInspector;
+ private transient ObjectInspector valueObjectInspector;
+ private transient int [] keyHashCode = new int [VectorizedRowBatch.DEFAULT_SIZE];
+
+ private transient int[] hashResult; // the pre-created array for reducerHash results
public VectorReduceSinkOperator(VectorizationContext vContext, OperatorDesc conf)
throws HiveException {
@@ -183,6 +188,11 @@ public class VectorReduceSinkOperator ex
tagByte[0] = (byte) tag;
LOG.info("Using tag = " + tag);
+ int limit = conf.getTopN();
+ float memUsage = conf.getTopNMemoryUsage();
+ if (limit >= 0 && memUsage > 0) {
+ reducerHash.initialize(limit, memUsage, conf.isMapGroupBy(), this);
+ }
} catch(Exception e) {
throw new HiveException(e);
}
@@ -215,21 +225,22 @@ public class VectorReduceSinkOperator ex
Object[] distributionKeys = new Object[numDistributionKeys];
- // Emit a (k,v) pair for each row in the batch
- //
+ // Determine which rows we need to emit based on topN optimization
+ int startResult = reducerHash.startVectorizedBatch();
+ if (startResult == TopNHash.EXCLUDED) {
+ return; // TopN wants us to exclude all rows.
+ }
+ boolean useTopN = startResult != TopNHash.FORWARD;
+ if (useTopN && (hashResult == null || hashResult.length < vrg.size)) {
+ hashResult = new int[Math.max(vrg.size, VectorizedRowBatch.DEFAULT_SIZE)];
+ }
+
for (int j = 0 ; j < vrg.size; ++j) {
int rowIndex = j;
if (vrg.selectedInUse) {
rowIndex = vrg.selected[j];
}
- for (int i = 0; i < valueEval.length; i++) {
- int batchColumn = valueEval[i].getOutputColumn();
- ColumnVector vectorColumn = vrg.cols[batchColumn];
- cachedValues[i] = valueWriters[i].writeValue(vectorColumn, rowIndex);
- }
- // Serialize the value
- value = valueSerializer.serialize(cachedValues, valueObjectInspector);
-
+ // First, evaluate the key - the way things stand we'd need it regardless.
for (int i = 0; i < keyEval.length; i++) {
int batchColumn = keyEval[i].getOutputColumn();
ColumnVector vectorColumn = vrg.cols[batchColumn];
@@ -237,69 +248,42 @@ public class VectorReduceSinkOperator ex
}
// no distinct key
System.arraycopy(distributionKeys, 0, cachedKeys[0], 0, numDistributionKeys);
- // Serialize the keys and append the tag
+ // TopN is not supported for multi-distinct currently. If we have more cachedKeys
+ // than one for every input key horrible things will happen (OOB error on array likely).
+ assert !useTopN || cachedKeys.length <= 1;
for (int i = 0; i < cachedKeys.length; i++) {
- if (keyIsText) {
- Text key = (Text) keySerializer.serialize(cachedKeys[i],
- keyObjectInspector);
- if (tag == -1) {
- keyWritable.set(key.getBytes(), 0, key.getLength());
- } else {
- int keyLength = key.getLength();
- keyWritable.setSize(keyLength + 1);
- System.arraycopy(key.getBytes(), 0, keyWritable.get(), 0, keyLength);
- keyWritable.get()[keyLength] = tagByte[0];
- }
- } else {
- // Must be BytesWritable
- BytesWritable key = (BytesWritable) keySerializer.serialize(
- cachedKeys[i], keyObjectInspector);
- if (tag == -1) {
- keyWritable.set(key.getBytes(), 0, key.getLength());
- } else {
- int keyLength = key.getLength();
- keyWritable.setSize(keyLength + 1);
- System.arraycopy(key.getBytes(), 0, keyWritable.get(), 0, keyLength);
- keyWritable.get()[keyLength] = tagByte[0];
- }
- }
- // Evaluate the HashCode
- int keyHashCode = 0;
- if (partitionEval.length == 0) {
- // If no partition cols, just distribute the data uniformly to provide
- // better
- // load balance. If the requirement is to have a single reducer, we
- // should set
- // the number of reducers to 1.
- // Use a constant seed to make the code deterministic.
- if (random == null) {
- random = new Random(12345);
- }
- keyHashCode = random.nextInt();
+ // Serialize the keys and append the tag.
+ Object keyObj = keySerializer.serialize(cachedKeys[i], keyObjectInspector);
+ setKeyWritable(keyIsText ? (Text)keyObj : (BytesWritable)keyObj, tag);
+ if (useTopN) {
+ reducerHash.tryStoreVectorizedKey(keyWritable, j, hashResult);
} else {
- for (int p = 0; p < partitionEval.length; p++) {
- ColumnVector columnVector = vrg.cols[partitionEval[p].getOutputColumn()];
- Object partitionValue = partitionWriters[p].writeValue(columnVector, rowIndex);
- keyHashCode = keyHashCode
- * 31
- + ObjectInspectorUtils.hashCode(
- partitionValue,
- partitionWriters[p].getObjectInspector());
- }
- }
- keyWritable.setHashCode(keyHashCode);
- if (out != null) {
- out.collect(keyWritable, value);
- // Since this is a terminal operator, update counters explicitly -
- // forward is not called
- if (counterNameToEnum != null) {
- ++outputRows;
- if (outputRows % 1000 == 0) {
- incrCounter(numOutputRowsCntr, outputRows);
- outputRows = 0;
- }
- }
- }
+ // No TopN, just forward the key
+ keyWritable.setHashCode(computeHashCode(vrg, rowIndex));
+ collect(keyWritable, makeValueWritable(vrg, rowIndex));
+ }
+ }
+ }
+
+ if (!useTopN) return; // All done.
+
+ // If we use topN, we have called tryStore on every key now. We can process the results.
+ for (int j = 0 ; j < vrg.size; ++j) {
+ int index = hashResult[j];
+ if (index == TopNHash.EXCLUDED) continue;
+ int rowIndex = j;
+ if (vrg.selectedInUse) {
+ rowIndex = vrg.selected[j];
+ }
+ // Compute everything now - we'd either store it, or forward it.
+ int hashCode = computeHashCode(vrg, rowIndex);
+ BytesWritable value = makeValueWritable(vrg, rowIndex);
+ if (index < 0) {
+ // Kinda hacky; see getVectorizedKeyToForward javadoc.
+ byte[] key = reducerHash.getVectorizedKeyToForward(index);
+ collect(key, value, hashCode);
+ } else {
+ reducerHash.storeValue(index, value, hashCode, true);
}
}
} catch (SerDeException e) {
@@ -309,6 +293,45 @@ public class VectorReduceSinkOperator ex
}
}
+ private BytesWritable makeValueWritable(VectorizedRowBatch vrg, int rowIndex)
+ throws HiveException, SerDeException {
+ for (int i = 0; i < valueEval.length; i++) {
+ int batchColumn = valueEval[i].getOutputColumn();
+ ColumnVector vectorColumn = vrg.cols[batchColumn];
+ cachedValues[i] = valueWriters[i].writeValue(vectorColumn, rowIndex);
+ }
+ // Serialize the value
+ return (BytesWritable)valueSerializer.serialize(cachedValues, valueObjectInspector);
+ }
+
+ private int computeHashCode(VectorizedRowBatch vrg, int rowIndex) throws HiveException {
+ // Evaluate the HashCode
+ int keyHashCode = 0;
+ if (partitionEval.length == 0) {
+ // If no partition cols, just distribute the data uniformly to provide
+ // better
+ // load balance. If the requirement is to have a single reducer, we
+ // should set
+ // the number of reducers to 1.
+ // Use a constant seed to make the code deterministic.
+ if (random == null) {
+ random = new Random(12345);
+ }
+ keyHashCode = random.nextInt();
+ } else {
+ for (int p = 0; p < partitionEval.length; p++) {
+ ColumnVector columnVector = vrg.cols[partitionEval[p].getOutputColumn()];
+ Object partitionValue = partitionWriters[p].writeValue(columnVector, rowIndex);
+ keyHashCode = keyHashCode
+ * 31
+ + ObjectInspectorUtils.hashCode(
+ partitionValue,
+ partitionWriters[p].getObjectInspector());
+ }
+ }
+ return keyHashCode;
+ }
+
static public String getOperatorName() {
return "RS";
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Tue Nov 5 07:01:32 2013
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector;
import java.lang.reflect.Constructor;
+import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -61,6 +62,12 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastTimestampToDoubleViaLongToDouble;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnBetween;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnNotBetween;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnBetween;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnBetween;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnNotBetween;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -250,7 +257,7 @@ public class VectorizationContext {
ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode);
} else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
- if (isCustomUDF(expr) || isLegacyPathUDF(expr)) {
+ if (isCustomUDF(expr) || isNonVectorizedPathUDF(expr)) {
ve = getCustomUDFExpression(expr);
} else {
ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
@@ -272,7 +279,7 @@ public class VectorizationContext {
* Depending on performance requirements and frequency of use, these
* may be implemented in the future with an optimized VectorExpression.
*/
- public static boolean isLegacyPathUDF(ExprNodeGenericFuncDesc expr) {
+ public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) {
GenericUDF gudf = expr.getGenericUDF();
if (gudf instanceof GenericUDFBridge) {
GenericUDFBridge bridge = (GenericUDFBridge) gudf;
@@ -364,6 +371,21 @@ public class VectorizationContext {
}
}
+ /* Fold simple unary expressions in all members of the input list and return new list
+ * containing results.
+ */
+ private List<ExprNodeDesc> foldConstantsForUnaryExprs(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ List<ExprNodeDesc> constantFoldedChildren = new ArrayList<ExprNodeDesc>();
+ if (childExpr != null) {
+ for (ExprNodeDesc expr : childExpr) {
+ expr = this.foldConstantsForUnaryExpression(expr);
+ constantFoldedChildren.add(expr);
+ }
+ }
+ return constantFoldedChildren;
+ }
+
private VectorExpression getConstantVectorExpression(ExprNodeConstantDesc exprDesc, Mode mode)
throws HiveException {
String type = exprDesc.getTypeString();
@@ -533,7 +555,9 @@ public class VectorizationContext {
private VectorExpression getGenericUdfVectorExpression(GenericUDF udf,
List<ExprNodeDesc> childExpr, Mode mode) throws HiveException {
//First handle special cases
- if (udf instanceof GenericUDFBridge) {
+ if (udf instanceof GenericUDFBetween) {
+ return getBetweenFilterExpression(childExpr);
+ } else if (udf instanceof GenericUDFBridge) {
VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode);
if (v != null) {
return v;
@@ -546,13 +570,7 @@ public class VectorizationContext {
udfClass = ((GenericUDFBridge) udf).getUdfClass();
}
- List<ExprNodeDesc> constantFoldedChildren = new ArrayList<ExprNodeDesc>();
- if (childExpr != null) {
- for (ExprNodeDesc expr : childExpr) {
- expr = this.foldConstantsForUnaryExpression(expr);
- constantFoldedChildren.add(expr);
- }
- }
+ List<ExprNodeDesc> constantFoldedChildren = foldConstantsForUnaryExprs(childExpr);
VectorExpression ve = getVectorExpressionForUdf(udfClass, constantFoldedChildren, mode);
if (ve == null) {
throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
@@ -649,6 +667,60 @@ public class VectorizationContext {
return null;
}
+ /* Get a [NOT] BETWEEN filter expression. This is treated as a special case
+ * because the NOT is actually specified in the expression tree as the first argument,
+ * and we don't want any runtime cost for that. So creating the VectorExpression
+ * needs to be done differently than the standard way where all arguments are
+ * passed to the VectorExpression constructor.
+ */
+ private VectorExpression getBetweenFilterExpression(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+
+ boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue();
+ ExprNodeDesc colExpr = childExpr.get(1);
+
+ // To hold left and right boundaries as long value in nanos for timestamp type.
+ long left, right;
+ List<ExprNodeDesc> newChildren;
+
+ String colType = colExpr.getTypeString();
+
+ // prepare arguments for createVectorExpression
+ List<ExprNodeDesc> childrenAfterNot = foldConstantsForUnaryExprs(childExpr.subList(1, 4));
+
+ // determine class
+ Class<?> cl = null;
+ if (isIntFamily(colType) && !notKeywordPresent) {
+ cl = FilterLongColumnBetween.class;
+ } else if (isIntFamily(colType) && notKeywordPresent) {
+ cl = FilterLongColumnNotBetween.class;
+ } else if (isFloatFamily(colType) && !notKeywordPresent) {
+ cl = FilterDoubleColumnBetween.class;
+ } else if (isFloatFamily(colType) && notKeywordPresent) {
+ cl = FilterDoubleColumnNotBetween.class;
+ } else if (colType.equals("string") && !notKeywordPresent) {
+ cl = FilterStringColumnBetween.class;
+ } else if (colType.equals("string") && notKeywordPresent) {
+ cl = FilterStringColumnNotBetween.class;
+ } else if (colType.equals("timestamp")) {
+
+ // Get timestamp boundary values as longs instead of the expected strings
+ left = getTimestampScalar(childExpr.get(2));
+ right = getTimestampScalar(childExpr.get(3));
+ childrenAfterNot = new ArrayList<ExprNodeDesc>();
+ childrenAfterNot.add(colExpr);
+ childrenAfterNot.add(new ExprNodeConstantDesc(left));
+ childrenAfterNot.add(new ExprNodeConstantDesc(right));
+ if (notKeywordPresent) {
+ cl = FilterLongColumnNotBetween.class;
+ } else {
+ cl = FilterLongColumnBetween.class;
+ }
+ }
+
+ return createVectorExpression(cl, childrenAfterNot, Mode.PROJECTION);
+ }
+
/*
* Return vector expression for a custom (i.e. not built-in) UDF.
*/
@@ -778,6 +850,44 @@ public class VectorizationContext {
}
}
+ // Get a timestamp as a long in number of nanos, from a string constant.
+ private long getTimestampScalar(ExprNodeDesc expr) throws HiveException {
+ if (!(expr instanceof ExprNodeConstantDesc)) {
+ throw new HiveException("Constant timestamp value expected for expression argument. " +
+ "Non-constant argument not supported for vectorization.");
+ }
+ ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr;
+ if (constExpr.getTypeString().equals("string")) {
+
+ // create expression tree with type cast from string to timestamp
+ ExprNodeGenericFuncDesc expr2 = new ExprNodeGenericFuncDesc();
+ GenericUDFTimestamp f = new GenericUDFTimestamp();
+ expr2.setGenericUDF(f);
+ ArrayList<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(expr);
+ expr2.setChildren(children);
+
+ // initialize and evaluate
+ ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(expr2);
+ ObjectInspector output = evaluator.initialize(null);
+ Object constant = evaluator.evaluate(null);
+ Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output);
+
+ if (!(java instanceof Timestamp)) {
+ throw new HiveException("Udf: failed to convert from string to timestamp");
+ }
+ Timestamp ts = (Timestamp) java;
+ long result = ts.getTime();
+ result *= 1000000; // shift left 6 digits to make room for nanos below ms precision
+ result += ts.getNanos() % 1000000; // add in nanos, after removing the ms portion
+ return result;
+ }
+
+ throw new HiveException("Udf: unhandled constant type for scalar argument. "
+ + "Expecting string.");
+ }
+
+
private Constructor<?> getConstructor(Class<?> cl) throws HiveException {
try {
Constructor<?> [] ctors = cl.getDeclaredConstructors();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java Tue Nov 5 07:01:32 2013
@@ -27,8 +27,8 @@ import java.util.List;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.Explain;
@@ -152,7 +152,7 @@ public class MergeWork extends MapWork i
Path dirPath = new Path(dirName);
try {
FileSystem inpFs = dirPath.getFileSystem(conf);
- FileStatus[] status = Utilities.getFileStatusRecurse(dirPath, listBucketingCtx
+ FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(dirPath, listBucketingCtx
.getSkewedColNames().size(), inpFs);
List<String> newInputPath = new ArrayList<String>();
boolean succeed = true;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java Tue Nov 5 07:01:32 2013
@@ -24,6 +24,7 @@ import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.ErrorMsg;
@@ -33,7 +34,6 @@ import org.apache.hadoop.hive.ql.io.rcfi
import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileValueBufferWrapper;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
-import org.apache.hadoop.hive.ql.stats.StatsSetupConst;
import org.apache.hadoop.hive.shims.CombineHiveKey;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Tue Nov 5 07:01:32 2013
@@ -48,6 +48,10 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.HiveStatsUtils;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
+import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.HiveMetaException;
@@ -1365,7 +1369,7 @@ private void constructOneLBLocationMap(F
new ArrayList<LinkedHashMap<String, String>>();
FileSystem fs = loadPath.getFileSystem(conf);
- FileStatus[] leafStatus = Utilities.getFileStatusRecurse(loadPath, numDP+1, fs);
+ FileStatus[] leafStatus = HiveStatsUtils.getFileStatusRecurse(loadPath, numDP+1, fs);
// Check for empty partitions
for (FileStatus s : leafStatus) {
// Check if the hadoop version supports sub-directories for tables/partitions
@@ -1558,6 +1562,17 @@ private void constructOneLBLocationMap(F
return getPartition(tbl, partSpec, forceCreate, null, true);
}
+ private static void clearPartitionStats(org.apache.hadoop.hive.metastore.api.Partition tpart) {
+ Map<String,String> tpartParams = tpart.getParameters();
+ if (tpartParams == null) {
+ return;
+ }
+ List<String> statTypes = StatsSetupConst.getSupportedStats();
+ for (String statType : statTypes) {
+ tpartParams.remove(statType);
+ }
+ }
+
/**
* Returns partition metadata
*
@@ -1627,6 +1642,7 @@ private void constructOneLBLocationMap(F
throw new HiveException("new partition path should not be null or empty.");
}
tpart.getSd().setLocation(partPath);
+ clearPartitionStats(tpart);
String fullName = tbl.getTableName();
if (!org.apache.commons.lang.StringUtils.isEmpty(tbl.getDbName())) {
fullName = tbl.getDbName() + "." + tbl.getTableName();
@@ -1730,7 +1746,7 @@ private void constructOneLBLocationMap(F
* @param tbl table for which partitions are needed
* @return list of partition objects
*/
- public Set<Partition> getAllPartitionsForPruner(Table tbl) throws HiveException {
+ public Set<Partition> getAllPartitionsOf(Table tbl) throws HiveException {
if (!tbl.isPartitioned()) {
return Sets.newHashSet(new Partition(tbl));
}
@@ -2405,21 +2421,13 @@ private void constructOneLBLocationMap(F
HiveMetaStoreClient.class.getName());
}
- /*
- * This api just sets up a metastore client. This is used for
- * pre-launching the metastore client so as to reduce latency
- * within a single session.
- */
- public void setupMSC() throws MetaException {
- getMSC();
- }
-
/**
- *
* @return the metastore client for the current thread
* @throws MetaException
*/
- private IMetaStoreClient getMSC() throws MetaException {
+ @LimitedPrivate(value = {"Hive"})
+ @Unstable
+ public IMetaStoreClient getMSC() throws MetaException {
if (metaStoreClient == null) {
metaStoreClient = createMetaStoreClient();
}
@@ -2565,4 +2573,5 @@ private void constructOneLBLocationMap(F
private static String[] getQualifiedNames(String qualifiedName) {
return qualifiedName.split("\\.");
}
+
};
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Tue Nov 5 07:01:32 2013
@@ -111,6 +111,9 @@ public class Optimizer {
if (HiveConf.getFloatVar(hiveConf, HiveConf.ConfVars.HIVELIMITPUSHDOWNMEMORYUSAGE) > 0) {
transformations.add(new LimitPushdownOptimizer());
}
+ if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES)) {
+ transformations.add(new StatsOptimizer());
+ }
transformations.add(new SimpleFetchOptimizer()); // must be called last
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKAGGR)) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Nov 5 07:01:32 2013
@@ -33,18 +33,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.OperatorFactory;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.*;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -71,6 +60,7 @@ import org.apache.hadoop.hive.ql.plan.ap
import org.apache.hadoop.hive.ql.udf.*;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
@@ -182,6 +172,7 @@ public class Vectorizer implements Physi
supportedGenericUDFs.add(GenericUDFUpper.class);
supportedGenericUDFs.add(GenericUDFConcat.class);
supportedGenericUDFs.add(GenericUDFAbs.class);
+ supportedGenericUDFs.add(GenericUDFBetween.class);
// For type casts
supportedGenericUDFs.add(UDFToLong.class);
@@ -449,7 +440,9 @@ public class Vectorizer implements Physi
boolean ret = false;
switch (op.getType()) {
case MAPJOIN:
- ret = validateMapJoinOperator((MapJoinOperator) op);
+ if (op instanceof MapJoinOperator) {
+ ret = validateMapJoinOperator((MapJoinOperator) op);
+ }
break;
case GROUPBY:
ret = validateGroupByOperator((GroupByOperator) op);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Tue Nov 5 07:01:32 2013
@@ -337,7 +337,7 @@ public class PartitionPruner implements
private static Set<Partition> getAllPartitions(Table tab) throws HiveException {
PerfLogger perfLogger = PerfLogger.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
- Set<Partition> result = Hive.get().getAllPartitionsForPruner(tab);
+ Set<Partition> result = Hive.get().getAllPartitionsOf(tab);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
return result;
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java Tue Nov 5 07:01:32 2013
@@ -75,11 +75,11 @@ public class EximUtil {
String scheme = uri.getScheme();
String authority = uri.getAuthority();
String path = uri.getPath();
- LOG.debug("Path before norm :" + path);
+ LOG.info("Path before norm :" + path);
// generate absolute path relative to home directory
if (!path.startsWith("/")) {
if (testMode) {
- path = (new Path(System.getProperty("build.dir.hive"),
+ path = (new Path(System.getProperty("test.tmp.dir"),
path)).toUri().getPath();
} else {
path = (new Path(new Path("/user/" + System.getProperty("user.name")),
@@ -102,7 +102,7 @@ public class EximUtil {
authority = defaultURI.getAuthority();
}
- LOG.debug("Scheme:" + scheme + ", authority:" + authority + ", path:" + path);
+ LOG.info("Scheme:" + scheme + ", authority:" + authority + ", path:" + path);
Collection<String> eximSchemes = conf.getStringCollection(
HiveConf.ConfVars.HIVE_EXIM_URI_SCHEME_WL.varname);
if (!eximSchemes.contains(scheme)) {
@@ -144,7 +144,7 @@ public class EximUtil {
String authority = uri.getAuthority();
String path = uri.getPath();
if (!path.startsWith("/")) {
- path = (new Path(System.getProperty("build.dir.hive"),
+ path = (new Path(System.getProperty("test.tmp.dir"),
path)).toUri().getPath();
}
if (StringUtils.isEmpty(scheme)) {
@@ -293,12 +293,12 @@ public class EximUtil {
/**
* Return the partition specification from the specified keys and values
- *
+ *
* @param partCols
* the names of the partition keys
* @param partVals
* the values of the partition keys
- *
+ *
* @return the partition specification as a map
*/
public static Map<String, String> makePartSpec(List<FieldSchema> partCols, List<String> partVals) {
@@ -311,7 +311,7 @@ public class EximUtil {
/**
* Compares the schemas - names, types and order, but ignoring comments
- *
+ *
* @param newSchema
* the new schema
* @param oldSchema
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java Tue Nov 5 07:01:32 2013
@@ -65,6 +65,11 @@ public class QB {
*/
private HashMap<String, WindowingSpec> destToWindowingSpec;
+ /*
+ * If this QB represents a SubQuery predicate then this will point to the SubQuery object.
+ */
+ private QBSubQuery subQueryPredicateDef;
+
// results
public void print(String msg) {
@@ -308,5 +313,12 @@ public class QB {
return destToWindowingSpec;
}
+ protected void setSubQueryDef(QBSubQuery subQueryPredicateDef) {
+ this.subQueryPredicateDef = subQueryPredicateDef;
+ }
+
+ protected QBSubQuery getSubQueryPredicateDef() {
+ return subQueryPredicateDef;
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java Tue Nov 5 07:01:32 2013
@@ -111,7 +111,11 @@ public class QBJoinTree implements Seria
* String
*/
public void setLeftAlias(String leftAlias) {
- this.leftAlias = leftAlias;
+ if ( this.leftAlias != null && !this.leftAlias.equals(leftAlias) ) {
+ this.leftAlias = null;
+ } else {
+ this.leftAlias = leftAlias;
+ }
}
public String[] getRightAliases() {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Nov 5 07:01:32 2013
@@ -1379,7 +1379,7 @@ public class SemanticAnalyzer extends Ba
}
@SuppressWarnings("nls")
- private void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn,
+ void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn,
ArrayList<String> leftAliases, ArrayList<String> rightAliases,
ArrayList<String> fields) throws SemanticException {
// String[] allAliases = joinTree.getAllAliases();
@@ -1501,6 +1501,160 @@ public class SemanticAnalyzer extends Ba
}
}
+ /*
+ * refactored out of the Equality case of parseJoinCondition
+ * so that this can be recursively called on its left tree in the case when
+ * only left sources are referenced in a Predicate
+ */
+ void applyEqualityPredicateToQBJoinTree(QBJoinTree joinTree,
+ JoinType type,
+ List<String> leftSrc,
+ ASTNode joinCond,
+ ASTNode leftCondn,
+ ASTNode rightCondn,
+ List<String> leftCondAl1,
+ List<String> leftCondAl2,
+ List<String> rightCondAl1,
+ List<String> rightCondAl2) throws SemanticException {
+ if (leftCondAl1.size() != 0) {
+ if ((rightCondAl1.size() != 0)
+ || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
+ if (type.equals(JoinType.LEFTOUTER) ||
+ type.equals(JoinType.FULLOUTER)) {
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
+ joinTree.getFilters().get(0).add(joinCond);
+ } else {
+ LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
+ joinTree.getFiltersForPushing().get(0).add(joinCond);
+ }
+ } else {
+ /*
+ * If the rhs references table sources and this QBJoinTree has a leftTree;
+ * hand it to the leftTree and let it recursively handle it.
+ * There are 3 cases of passing a condition down:
+ * 1. The leftSide && rightSide don't contains references to the leftTree's rightAlias
+ * => pass the lists down as is.
+ * 2. The leftSide contains refs to the leftTree's rightAlias, the rightSide doesn't
+ * => switch the leftCondAl1 and leftConAl2 lists and pass down.
+ * 3. The rightSide contains refs to the leftTree's rightAlias, the leftSide doesn't
+ * => switch the rightCondAl1 and rightConAl2 lists and pass down.
+ * 4. In case both contain references to the leftTree's rightAlias
+ * => we cannot push the condition down.
+ * 5. If either contain references to both left & right
+ * => we cannot push forward.
+ */
+ if (rightCondAl1.size() != 0) {
+ QBJoinTree leftTree = joinTree.getJoinSrc();
+ List<String> leftTreeLeftSrc = new ArrayList<String>();
+ if (leftTree != null) {
+ String leftTreeRightSource = leftTree.getRightAliases() != null &&
+ leftTree.getRightAliases().length > 0 ?
+ leftTree.getRightAliases()[0] : null;
+
+ boolean leftHasRightReference = false;
+ for (String r : leftCondAl1) {
+ if (r.equals(leftTreeRightSource)) {
+ leftHasRightReference = true;
+ break;
+ }
+ }
+ boolean rightHasRightReference = false;
+ for (String r : rightCondAl1) {
+ if (r.equals(leftTreeRightSource)) {
+ rightHasRightReference = true;
+ break;
+ }
+ }
+
+ boolean pushedDown = false;
+ if ( !leftHasRightReference && !rightHasRightReference ) {
+ applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
+ joinCond, leftCondn, rightCondn,
+ leftCondAl1, leftCondAl2,
+ rightCondAl1, rightCondAl2);
+ pushedDown = true;
+ } else if ( !leftHasRightReference && rightHasRightReference && rightCondAl1.size() == 1 ) {
+ applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
+ joinCond, leftCondn, rightCondn,
+ leftCondAl1, leftCondAl2,
+ rightCondAl2, rightCondAl1);
+ pushedDown = true;
+ } else if (leftHasRightReference && !rightHasRightReference && leftCondAl1.size() == 1 ) {
+ applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
+ joinCond, leftCondn, rightCondn,
+ leftCondAl2, leftCondAl1,
+ rightCondAl1, rightCondAl2);
+ pushedDown = true;
+ }
+
+ if (leftTreeLeftSrc.size() == 1) {
+ leftTree.setLeftAlias(leftTreeLeftSrc.get(0));
+ }
+ if ( pushedDown) {
+ return;
+ }
+ } // leftTree != null
+ }
+ joinTree.getFiltersForPushing().get(0).add(joinCond);
+ }
+ } else if (rightCondAl2.size() != 0) {
+ populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
+ leftSrc);
+ populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
+ leftSrc);
+ boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
+ joinTree.getNullSafes().add(nullsafe);
+ }
+ } else if (leftCondAl2.size() != 0) {
+ if ((rightCondAl2.size() != 0)
+ || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
+ if (type.equals(JoinType.RIGHTOUTER)
+ || type.equals(JoinType.FULLOUTER)) {
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
+ joinTree.getFilters().get(1).add(joinCond);
+ } else {
+ LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
+ joinTree.getFiltersForPushing().get(1).add(joinCond);
+ }
+ } else {
+ joinTree.getFiltersForPushing().get(1).add(joinCond);
+ }
+ } else if (rightCondAl1.size() != 0) {
+ populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
+ leftSrc);
+ populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
+ leftSrc);
+ boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
+ joinTree.getNullSafes().add(nullsafe);
+ }
+ } else if (rightCondAl1.size() != 0) {
+ if (type.equals(JoinType.LEFTOUTER)
+ || type.equals(JoinType.FULLOUTER)) {
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
+ joinTree.getFilters().get(0).add(joinCond);
+ } else {
+ LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
+ joinTree.getFiltersForPushing().get(0).add(joinCond);
+ }
+ } else {
+ joinTree.getFiltersForPushing().get(0).add(joinCond);
+ }
+ } else {
+ if (type.equals(JoinType.RIGHTOUTER)
+ || type.equals(JoinType.FULLOUTER)) {
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
+ joinTree.getFilters().get(1).add(joinCond);
+ } else {
+ LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
+ joinTree.getFiltersForPushing().get(1).add(joinCond);
+ }
+ } else {
+ joinTree.getFiltersForPushing().get(1).add(joinCond);
+ }
+ }
+
+ }
+
private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, List<String> leftSrc)
throws SemanticException {
if (joinCond == null) {
@@ -1581,75 +1735,10 @@ public class SemanticAnalyzer extends Ba
.getMsg(joinCond));
}
- if (leftCondAl1.size() != 0) {
- if ((rightCondAl1.size() != 0)
- || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
- if (type.equals(JoinType.LEFTOUTER) ||
- type.equals(JoinType.FULLOUTER)) {
- if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
- joinTree.getFilters().get(0).add(joinCond);
- } else {
- LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
- joinTree.getFiltersForPushing().get(0).add(joinCond);
- }
- } else {
- joinTree.getFiltersForPushing().get(0).add(joinCond);
- }
- } else if (rightCondAl2.size() != 0) {
- populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
- leftSrc);
- populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
- leftSrc);
- boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
- joinTree.getNullSafes().add(nullsafe);
- }
- } else if (leftCondAl2.size() != 0) {
- if ((rightCondAl2.size() != 0)
- || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
- if (type.equals(JoinType.RIGHTOUTER)
- || type.equals(JoinType.FULLOUTER)) {
- if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
- joinTree.getFilters().get(1).add(joinCond);
- } else {
- LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
- joinTree.getFiltersForPushing().get(1).add(joinCond);
- }
- } else {
- joinTree.getFiltersForPushing().get(1).add(joinCond);
- }
- } else if (rightCondAl1.size() != 0) {
- populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
- leftSrc);
- populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
- leftSrc);
- boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
- joinTree.getNullSafes().add(nullsafe);
- }
- } else if (rightCondAl1.size() != 0) {
- if (type.equals(JoinType.LEFTOUTER)
- || type.equals(JoinType.FULLOUTER)) {
- if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
- joinTree.getFilters().get(0).add(joinCond);
- } else {
- LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
- joinTree.getFiltersForPushing().get(0).add(joinCond);
- }
- } else {
- joinTree.getFiltersForPushing().get(0).add(joinCond);
- }
- } else {
- if (type.equals(JoinType.RIGHTOUTER)
- || type.equals(JoinType.FULLOUTER)) {
- if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
- joinTree.getFilters().get(1).add(joinCond);
- } else {
- LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
- joinTree.getFiltersForPushing().get(1).add(joinCond);
- }
- } else {
- joinTree.getFiltersForPushing().get(1).add(joinCond);
- }
- }
+ applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc,
+ joinCond, leftCondn, rightCondn,
+ leftCondAl1, leftCondAl2,
+ rightCondAl1, rightCondAl2);
break;
@@ -1792,7 +1881,15 @@ public class SemanticAnalyzer extends Ba
ASTNode searchCond = (ASTNode) whereExpr.getChild(0);
List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
- if ( subQueriesInOriginalTree != null ) {
+ if ( subQueriesInOriginalTree.size() > 0 ) {
+
+ /*
+ * Restriction.9.m :: disallow nested SubQuery expressions.
+ */
+ if (qb.getSubQueryPredicateDef() != null ) {
+ throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported."));
+ }
/*
* Restriction.8.m :: We allow only 1 SubQuery expression per Query.
@@ -1822,6 +1919,7 @@ public class SemanticAnalyzer extends Ba
subQuery.validateAndRewriteAST(inputRR);
QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
+ qbSQ.setSubQueryDef(subQuery);
Phase1Ctx ctx_1 = initPhase1Ctx();
doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1);
getMetaData(qbSQ);
@@ -6839,7 +6937,7 @@ public class SemanticAnalyzer extends Ba
/**
* Merges node to target
*/
- private void mergeJoins(QB qb, QBJoinTree node, QBJoinTree target, int pos) {
+ private void mergeJoins(QB qb, QBJoinTree node, QBJoinTree target, int pos, int[] tgtToNodeExprMap) {
String[] nodeRightAliases = node.getRightAliases();
String[] trgtRightAliases = target.getRightAliases();
String[] rightAliases = new String[nodeRightAliases.length
@@ -6868,7 +6966,12 @@ public class SemanticAnalyzer extends Ba
ArrayList<ArrayList<ASTNode>> expr = target.getExpressions();
for (int i = 0; i < nodeRightAliases.length; i++) {
- expr.add(node.getExpressions().get(i + 1));
+ List<ASTNode> nodeConds = node.getExpressions().get(i + 1);
+ ArrayList<ASTNode> reordereNodeConds = new ArrayList<ASTNode>();
+ for(int k=0; k < tgtToNodeExprMap.length; k++) {
+ reordereNodeConds.add(nodeConds.get(k));
+ }
+ expr.add(reordereNodeConds);
}
ArrayList<Boolean> nns = node.getNullSafes();
@@ -6985,11 +7088,11 @@ public class SemanticAnalyzer extends Ba
}
}
- private int findMergePos(QBJoinTree node, QBJoinTree target) {
+ private ObjectPair<Integer, int[]> findMergePos(QBJoinTree node, QBJoinTree target) {
int res = -1;
String leftAlias = node.getLeftAlias();
if (leftAlias == null) {
- return -1;
+ return new ObjectPair(-1, null);
}
ArrayList<ASTNode> nodeCondn = node.getExpressions().get(0);
@@ -7008,18 +7111,41 @@ public class SemanticAnalyzer extends Ba
}
}
- if ((targetCondn == null) || (nodeCondn.size() != targetCondn.size())) {
- return -1;
+ if ( targetCondn == null ) {
+ return new ObjectPair(-1, null);
+ }
+
+ /*
+ * The order of the join condition expressions don't matter.
+ * A merge can happen:
+ * - if every target condition is present in some position of the node condition list.
+ * - there is no node condition, which is not equal to any target condition.
+ */
+
+ int[] tgtToNodeExprMap = new int[targetCondn.size()];
+ boolean[] nodeFiltersMapped = new boolean[nodeCondn.size()];
+ int i, j;
+ for(i=0; i<targetCondn.size(); i++) {
+ String tgtExprTree = targetCondn.get(i).toStringTree();
+ tgtToNodeExprMap[i] = -1;
+ for(j=0; j < nodeCondn.size(); j++) {
+ if ( nodeCondn.get(j).toStringTree().equals(tgtExprTree)) {
+ tgtToNodeExprMap[i] = j;
+ nodeFiltersMapped[j] = true;
+ }
+ }
+ if ( tgtToNodeExprMap[i] == -1) {
+ return new ObjectPair(-1, null);
+ }
}
- for (int i = 0; i < nodeCondn.size(); i++) {
- if (!nodeCondn.get(i).toStringTree().equals(
- targetCondn.get(i).toStringTree())) {
- return -1;
+ for(j=0; j < nodeCondn.size(); j++) {
+ if ( !nodeFiltersMapped[j]) {
+ return new ObjectPair(-1, null);
}
}
- return res;
+ return new ObjectPair(res, tgtToNodeExprMap);
}
// try merge join tree from inner most source
@@ -7054,7 +7180,8 @@ public class SemanticAnalyzer extends Ba
if (prevType != null && prevType != currType) {
break;
}
- int pos = findMergePos(node, target);
+ ObjectPair<Integer, int[]> mergeDetails = findMergePos(node, target);
+ int pos = mergeDetails.getFirst();
if (pos >= 0) {
// for outer joins, it should not exceed 16 aliases (short type)
if (!node.getNoOuterJoin() || !target.getNoOuterJoin()) {
@@ -7063,7 +7190,7 @@ public class SemanticAnalyzer extends Ba
continue;
}
}
- mergeJoins(qb, node, target, pos);
+ mergeJoins(qb, node, target, pos, mergeDetails.getSecond());
trees.set(j, null);
continue; // continue merging with next alias
}
@@ -7993,7 +8120,7 @@ public class SemanticAnalyzer extends Ba
}
private String getAliasId(String alias, QB qb) {
- return (qb.getId() == null ? alias : qb.getId() + ":" + alias);
+ return (qb.getId() == null ? alias : qb.getId() + ":" + alias).toLowerCase();
}
@SuppressWarnings("nls")
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java Tue Nov 5 07:01:32 2013
@@ -4,8 +4,6 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import org.antlr.runtime.tree.TreeWizard;
-import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
@@ -173,31 +171,22 @@ public class SubQueryUtils {
static List<ASTNode> findSubQueries(ASTNode node)
throws SemanticException {
- TreeWizard tw = new TreeWizard(ParseDriver.adaptor, HiveParser.tokenNames);
- SubQueryVisitor visitor = new SubQueryVisitor();
- tw.visit(node, HiveParser.TOK_SUBQUERY_EXPR, visitor);
- return visitor.getSubQueries();
- }
-
- static class SubQueryVisitor implements ContextVisitor {
- String errMsg;
- boolean throwError = false;
- ASTNode errorNode;
- List<ASTNode> subQueries;
-
- @SuppressWarnings("rawtypes")
- @Override
- public void visit(Object t, Object parent, int childIndex, Map labels) {
- if (subQueries == null ) {
- subQueries = new ArrayList<ASTNode>();
- }
- subQueries.add((ASTNode)t);
- }
+ List<ASTNode> subQueries = new ArrayList<ASTNode>();
+ findSubQueries(node, subQueries);
+ return subQueries;
+ }
- public List<ASTNode> getSubQueries() {
- return subQueries;
+ private static void findSubQueries(ASTNode node, List<ASTNode> subQueries) {
+ switch(node.getType()) {
+ case HiveParser.TOK_SUBQUERY_EXPR:
+ subQueries.add(node);
+ break;
+ default:
+ int childCount = node.getChildCount();
+ for(int i=0; i < childCount; i++) {
+ findSubQueries((ASTNode) node.getChild(i), subQueries);
+ }
}
-
}
static QBSubQuery buildSubQuery(String outerQueryId,
@@ -208,6 +197,16 @@ public class SubQueryUtils {
ASTNode sqOp = (ASTNode) sqAST.getChild(0);
ASTNode sq = (ASTNode) sqAST.getChild(1);
ASTNode outerQueryExpr = (ASTNode) sqAST.getChild(2);
+
+ /*
+ * Restriction.8.m :: We allow only 1 SubQuery expression per Query.
+ */
+ if (outerQueryExpr != null && outerQueryExpr.getType() == HiveParser.TOK_SUBQUERY_EXPR ) {
+
+ throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ originalSQAST.getChild(1), "Only 1 SubQuery expression is supported."));
+ }
+
return new QBSubQuery(outerQueryId, sqIdx, sq, outerQueryExpr,
buildSQOperator(sqOp),
originalSQAST,
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java Tue Nov 5 07:01:32 2013
@@ -28,10 +28,10 @@ import java.util.Map;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.exec.Utilities;
/**
* Conditional task resolution interface. This is invoked at run time to get the
@@ -231,7 +231,7 @@ public class ConditionalResolverMergeFil
throws IOException {
DynamicPartitionCtx dpCtx = ctx.getDPCtx();
// get list of dynamic partitions
- FileStatus[] status = Utilities.getFileStatusRecurse(dirPath, dpLbLevel, inpFs);
+ FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(dirPath, dpLbLevel, inpFs);
// cleanup pathToPartitionInfo
Map<String, PartitionDesc> ptpi = work.getPathToPartitionInfo();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java Tue Nov 5 07:01:32 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.ListSinkOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.parse.SplitSample;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
/**
* FetchWork.
@@ -50,6 +51,9 @@ public class FetchWork implements Serial
private SplitSample splitSample;
+ private transient List<List<Object>> rowsComputedFromStats;
+ private transient ObjectInspector statRowOI;
+
/**
* Serialization Null Format for the serde used to fetch data.
*/
@@ -58,6 +62,19 @@ public class FetchWork implements Serial
public FetchWork() {
}
+ public FetchWork(List<List<Object>> rowsComputedFromStats,ObjectInspector statRowOI) {
+ this.rowsComputedFromStats = rowsComputedFromStats;
+ this.statRowOI = statRowOI;
+ }
+
+ public ObjectInspector getStatRowOI() {
+ return statRowOI;
+ }
+
+ public List<List<Object>> getRowsComputedUsingStats() {
+ return rowsComputedFromStats;
+ }
+
public FetchWork(String tblDir, TableDesc tblDesc) {
this(tblDir, tblDesc, -1);
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java Tue Nov 5 07:01:32 2013
@@ -143,12 +143,21 @@ public class SetProcessor implements Com
String value = new VariableSubstitution().substitute(conf, varvalue);
if (conf.getBoolVar(HiveConf.ConfVars.HIVECONFVALIDATION)) {
HiveConf.ConfVars confVars = HiveConf.getConfVars(key);
- if (confVars != null && !confVars.isType(value)) {
- StringBuilder message = new StringBuilder();
- message.append("'SET ").append(varname).append('=').append(varvalue);
- message.append("' FAILED because "); message.append(key).append(" expects an ");
- message.append(confVars.typeString()).append(" value.");
- throw new IllegalArgumentException(message.toString());
+ if (confVars != null) {
+ if (!confVars.isType(value)) {
+ StringBuilder message = new StringBuilder();
+ message.append("'SET ").append(varname).append('=').append(varvalue);
+ message.append("' FAILED because ").append(key).append(" expects an ");
+ message.append(confVars.typeString()).append(" value.");
+ throw new IllegalArgumentException(message.toString());
+ }
+ String fail = confVars.validate(value);
+ if (fail != null) {
+ StringBuilder message = new StringBuilder();
+ message.append("'SET ").append(varname).append('=').append(varvalue);
+ message.append("' FAILED in validation : ").append(fail).append('.');
+ throw new IllegalArgumentException(message.toString());
+ }
}
}
conf.verifyAndSet(key, value);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Tue Nov 5 07:01:32 2013
@@ -33,8 +33,6 @@ import java.util.Map;
import java.util.Set;
import java.util.UUID;
-import javax.security.auth.login.LoginException;
-
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
@@ -255,6 +253,13 @@ public class SessionState {
}
/**
+ * Sets the given session state in the thread local var for sessions.
+ */
+ public static void setCurrentSessionState(SessionState session) {
+ tss.set(session);
+ }
+
+ /**
* set current session to existing session object if a thread is running
* multiple sessions - it must call this method with the new session object
* when switching from one session to another.
@@ -262,7 +267,7 @@ public class SessionState {
*/
public static SessionState start(SessionState startSs) {
- tss.set(startSs);
+ setCurrentSessionState(startSs);
if(startSs.hiveHist == null){
if (startSs.getConf().getBoolVar(HiveConf.ConfVars.HIVE_SESSION_HISTORY_ENABLED)) {
@@ -285,7 +290,7 @@ public class SessionState {
// Get the following out of the way when you start the session these take a
// while and should be done when we start up.
try {
- Hive.get(startSs.conf).setupMSC();
+ Hive.get(startSs.conf).getMSC();
ShimLoader.getHadoopShims().getUGIForConf(startSs.conf);
FileSystem.get(startSs.conf);
} catch (Exception e) {
@@ -293,7 +298,7 @@ public class SessionState {
// that would cause ClassNoFoundException otherwise
throw new RuntimeException(e);
}
-
+
try {
startSs.authenticator = HiveUtils.getAuthenticator(
startSs.getConf(),HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java Tue Nov 5 07:01:32 2013
@@ -24,6 +24,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.util.ReflectionUtils;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsSetupConst.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsSetupConst.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsSetupConst.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsSetupConst.java Tue Nov 5 07:01:32 2013
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.stats;
-
-/**
- * A class that defines the constant strings used by the statistics implementation.
- */
-
-public class StatsSetupConst {
-
- /**
- * The value of the user variable "hive.stats.dbclass" to use HBase implementation.
- */
- public static final String HBASE_IMPL_CLASS_VAL = "hbase";
-
- /**
- * The value of the user variable "hive.stats.dbclass" to use JDBC implementation.
- */
- public static final String JDBC_IMPL_CLASS_VAL = "jdbc";
-
- /**
- * The name of the statistic Num Files to be published or gathered.
- */
- public static final String NUM_FILES = "numFiles";
-
- /**
- * The name of the statistic Num Partitions to be published or gathered.
- */
- public static final String NUM_PARTITIONS = "numPartitions";
-
- /**
- * The name of the statistic Total Size to be published or gathered.
- */
- public static final String TOTAL_SIZE = "totalSize";
-
-
- // statistics stored in metastore
-
- /**
- * The name of the statistic Row Count to be published or gathered.
- */
- public static final String ROW_COUNT = "numRows";
-
- /**
- * The name of the statistic Raw Data Size to be published or gathered.
- */
- public static final String RAW_DATA_SIZE = "rawDataSize";
-
-}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java Tue Nov 5 07:01:32 2013
@@ -23,7 +23,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import org.apache.hadoop.hive.ql.stats.StatsSetupConst;
+import org.apache.hadoop.hive.common.StatsSetupConst;
public class JDBCStatsUtils {
Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java Tue Nov 5 07:01:32 2013
@@ -69,10 +69,9 @@ public class TestExecDriver extends Test
static HiveConf conf;
- private static String tmpdir = System.getProperty("java.io.tmpdir") + File.separator + System.getProperty("user.name")
- + File.separator;
- private static Log LOG = LogFactory.getLog(TestExecDriver.class);
- private static Path tmppath = new Path(tmpdir);
+ private static final String tmpdir = System.getProperty("test.tmp.dir");
+ private static final Log LOG = LogFactory.getLog(TestExecDriver.class);
+ private static final Path tmppath = new Path(tmpdir);
private static Hive db;
private static FileSystem fs;
@@ -80,7 +79,7 @@ public class TestExecDriver extends Test
try {
conf = new HiveConf(ExecDriver.class);
SessionState.start(conf);
-
+
fs = FileSystem.get(conf);
if (fs.exists(tmppath) && !fs.getFileStatus(tmppath).isDir()) {
throw new RuntimeException(tmpdir + " exists but is not a directory");
@@ -155,10 +154,10 @@ public class TestExecDriver extends Test
// inbuilt assumption that the testdir has only one output file.
Path di_test = new Path(tmppath, testdir);
if (!fs.exists(di_test)) {
- throw new RuntimeException(tmpdir + testdir + " does not exist");
+ throw new RuntimeException(tmpdir + File.separator + testdir + " does not exist");
}
if (!fs.getFileStatus(di_test).isDir()) {
- throw new RuntimeException(tmpdir + testdir + " is not a directory");
+ throw new RuntimeException(tmpdir + File.separator + testdir + " is not a directory");
}
FSDataInputStream fi_test = fs.open((fs.listStatus(di_test))[0].getPath());
@@ -197,7 +196,7 @@ public class TestExecDriver extends Test
@SuppressWarnings("unchecked")
private void populateMapPlan1(Table src) {
- Operator<FileSinkDesc> op2 = OperatorFactory.get(new FileSinkDesc(tmpdir
+ Operator<FileSinkDesc> op2 = OperatorFactory.get(new FileSinkDesc(tmpdir + File.separator
+ "mapplan1.out", Utilities.defaultTd, true));
Operator<FilterDesc> op1 = OperatorFactory.get(getTestFilterDesc("key"),
op2);
@@ -208,7 +207,7 @@ public class TestExecDriver extends Test
@SuppressWarnings("unchecked")
private void populateMapPlan2(Table src) {
- Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
+ Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir + File.separator
+ "mapplan2.out", Utilities.defaultTd, false));
Operator<ScriptDesc> op2 = OperatorFactory.get(new ScriptDesc("cat",
@@ -244,7 +243,7 @@ public class TestExecDriver extends Test
mr.setReduceWork(rWork);
// reduce side work
- Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
+ Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir + File.separator
+ "mapredplan1.out", Utilities.defaultTd, false));
Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
@@ -274,7 +273,7 @@ public class TestExecDriver extends Test
mr.setReduceWork(rWork);
// reduce side work
- Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(tmpdir
+ Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(tmpdir + File.separator
+ "mapredplan2.out", Utilities.defaultTd, false));
Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4);
@@ -318,7 +317,7 @@ public class TestExecDriver extends Test
rWork.getTagToValueDesc().add(op2.getConf().getValueSerializeInfo());
// reduce side work
- Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(tmpdir
+ Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(tmpdir + File.separator
+ "mapredplan3.out", Utilities.defaultTd, false));
Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(Utilities
@@ -361,7 +360,7 @@ public class TestExecDriver extends Test
mr.setReduceWork(rWork);
// reduce side work
- Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
+ Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir + File.separator
+ "mapredplan4.out", Utilities.defaultTd, false));
Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
@@ -400,7 +399,7 @@ public class TestExecDriver extends Test
rWork.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo());
// reduce side work
- Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
+ Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir + File.separator
+ "mapredplan5.out", Utilities.defaultTd, false));
Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
@@ -441,7 +440,7 @@ public class TestExecDriver extends Test
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
// reduce side work
- Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
+ Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir + File.separator
+ "mapredplan6.out", Utilities.defaultTd, false));
Operator<FilterDesc> op2 = OperatorFactory.get(getTestFilterDesc("0"), op3);
Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisherEnhanced.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisherEnhanced.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisherEnhanced.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisherEnhanced.java Tue Nov 5 07:01:32 2013
@@ -24,11 +24,11 @@ import java.util.Map;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.stats.StatsAggregator;
import org.apache.hadoop.hive.ql.stats.StatsFactory;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
-import org.apache.hadoop.hive.ql.stats.StatsSetupConst;
import org.apache.hadoop.mapred.JobConf;
/**
Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java?rev=1538880&r1=1538879&r2=1538880&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java Tue Nov 5 07:01:32 2013
@@ -31,12 +31,18 @@ import junit.framework.Assert;
import org.apache.hadoop.hive.ql.exec.vector.expressions.*;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColUnaryMinus;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColLessDoubleScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnBetween;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnNotBetween;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColLessDoubleScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnBetween;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongScalarGreaterLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColGreaterStringColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColGreaterStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnBetween;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnNotBetween;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncLnDoubleToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncRoundDoubleToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncSinDoubleToDouble;
@@ -66,6 +72,7 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.UDFSin;
import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
@@ -859,4 +866,60 @@ public class TestVectorizationContext {
ve = vc.getVectorExpression(tsFuncExpr);
Assert.assertEquals(VectorUDFUnixTimeStampLong.class, ve.getClass());
}
+
+ @Test
+ public void testBetweenFilters() throws HiveException {
+ ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
+ ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
+ ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc("Bravo");
+
+ // string BETWEEN
+ GenericUDFBetween udf = new GenericUDFBetween();
+ ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc();
+ exprDesc.setGenericUDF(udf);
+ List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
+ children1.add(new ExprNodeConstantDesc(new Boolean(false))); // no NOT keyword
+ children1.add(col1Expr);
+ children1.add(constDesc);
+ children1.add(constDesc2);
+ exprDesc.setChildren(children1);
+
+ Map<String, Integer> columnMap = new HashMap<String, Integer>();
+ columnMap.put("col1", 1);
+ columnMap.put("col2", 2);
+ VectorizationContext vc = new VectorizationContext(columnMap, 2);
+ VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
+ assertTrue(ve instanceof FilterStringColumnBetween);
+
+ // string NOT BETWEEN
+ children1.set(0, new ExprNodeConstantDesc(new Boolean(true))); // has NOT keyword
+ ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
+ assertTrue(ve instanceof FilterStringColumnNotBetween);
+
+ // long BETWEEN
+ children1.set(0, new ExprNodeConstantDesc(new Boolean(false)));
+ children1.set(1, new ExprNodeColumnDesc(Long.class, "col1", "table", false));
+ children1.set(2, new ExprNodeConstantDesc(10));
+ children1.set(3, new ExprNodeConstantDesc(20));
+ ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
+ assertTrue(ve instanceof FilterLongColumnBetween);
+
+ // long NOT BETWEEN
+ children1.set(0, new ExprNodeConstantDesc(new Boolean(true)));
+ ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
+ assertTrue(ve instanceof FilterLongColumnNotBetween);
+
+ // double BETWEEN
+ children1.set(0, new ExprNodeConstantDesc(new Boolean(false)));
+ children1.set(1, new ExprNodeColumnDesc(Double.class, "col1", "table", false));
+ children1.set(2, new ExprNodeConstantDesc(10.0d));
+ children1.set(3, new ExprNodeConstantDesc(20.0d));
+ ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
+ assertTrue(ve instanceof FilterDoubleColumnBetween);
+
+ // double NOT BETWEEN
+ children1.set(0, new ExprNodeConstantDesc(new Boolean(true)));
+ ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
+ assertTrue(ve instanceof FilterDoubleColumnNotBetween);
+ }
}