You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/11/26 09:19:34 UTC
svn commit: r1545564 [5/22] - in /hive/branches/tez: ./ ant/ beeline/ bin/
cli/ common/ common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/common/type/
common/src/java/org/apache/hadoop/hive/conf/ common/src/test/org/...
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Nov 26 08:19:25 2013
@@ -33,7 +33,18 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.ql.exec.*;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -54,10 +65,64 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.*;
+import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
-import org.apache.hadoop.hive.ql.udf.*;
+import org.apache.hadoop.hive.ql.udf.UDFAcos;
+import org.apache.hadoop.hive.ql.udf.UDFAsin;
+import org.apache.hadoop.hive.ql.udf.UDFAtan;
+import org.apache.hadoop.hive.ql.udf.UDFBin;
+import org.apache.hadoop.hive.ql.udf.UDFCeil;
+import org.apache.hadoop.hive.ql.udf.UDFConv;
+import org.apache.hadoop.hive.ql.udf.UDFCos;
+import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
+import org.apache.hadoop.hive.ql.udf.UDFDegrees;
+import org.apache.hadoop.hive.ql.udf.UDFExp;
+import org.apache.hadoop.hive.ql.udf.UDFFloor;
+import org.apache.hadoop.hive.ql.udf.UDFHex;
+import org.apache.hadoop.hive.ql.udf.UDFHour;
+import org.apache.hadoop.hive.ql.udf.UDFLTrim;
+import org.apache.hadoop.hive.ql.udf.UDFLength;
+import org.apache.hadoop.hive.ql.udf.UDFLike;
+import org.apache.hadoop.hive.ql.udf.UDFLn;
+import org.apache.hadoop.hive.ql.udf.UDFLog;
+import org.apache.hadoop.hive.ql.udf.UDFLog10;
+import org.apache.hadoop.hive.ql.udf.UDFLog2;
+import org.apache.hadoop.hive.ql.udf.UDFMinute;
+import org.apache.hadoop.hive.ql.udf.UDFOPNegative;
+import org.apache.hadoop.hive.ql.udf.UDFOPPositive;
+import org.apache.hadoop.hive.ql.udf.UDFPower;
+import org.apache.hadoop.hive.ql.udf.UDFRTrim;
+import org.apache.hadoop.hive.ql.udf.UDFRadians;
+import org.apache.hadoop.hive.ql.udf.UDFRand;
+import org.apache.hadoop.hive.ql.udf.UDFRegExp;
+import org.apache.hadoop.hive.ql.udf.UDFSecond;
+import org.apache.hadoop.hive.ql.udf.UDFSign;
+import org.apache.hadoop.hive.ql.udf.UDFSin;
+import org.apache.hadoop.hive.ql.udf.UDFSqrt;
+import org.apache.hadoop.hive.ql.udf.UDFSubstr;
+import org.apache.hadoop.hive.ql.udf.UDFTan;
+import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
+import org.apache.hadoop.hive.ql.udf.UDFToByte;
+import org.apache.hadoop.hive.ql.udf.UDFToDouble;
+import org.apache.hadoop.hive.ql.udf.UDFToFloat;
+import org.apache.hadoop.hive.ql.udf.UDFToInteger;
+import org.apache.hadoop.hive.ql.udf.UDFToLong;
+import org.apache.hadoop.hive.ql.udf.UDFToShort;
+import org.apache.hadoop.hive.ql.udf.UDFToString;
+import org.apache.hadoop.hive.ql.udf.UDFTrim;
+import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
+import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
@@ -66,17 +131,23 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPDivide;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMinus;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMod;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMultiply;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFRound;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFPosMod;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper;
@@ -111,11 +182,11 @@ public class Vectorizer implements Physi
supportedGenericUDFs.add(UDFOPNegative.class);
supportedGenericUDFs.add(UDFOPPositive.class);
- supportedGenericUDFs.add(UDFOPPlus.class);
- supportedGenericUDFs.add(UDFOPMinus.class);
- supportedGenericUDFs.add(UDFOPMultiply.class);
- supportedGenericUDFs.add(UDFOPDivide.class);
- supportedGenericUDFs.add(UDFOPMod.class);
+ supportedGenericUDFs.add(GenericUDFOPPlus.class);
+ supportedGenericUDFs.add(GenericUDFOPMinus.class);
+ supportedGenericUDFs.add(GenericUDFOPMultiply.class);
+ supportedGenericUDFs.add(GenericUDFOPDivide.class);
+ supportedGenericUDFs.add(GenericUDFOPMod.class);
supportedGenericUDFs.add(GenericUDFOPEqualOrLessThan.class);
supportedGenericUDFs.add(GenericUDFOPEqualOrGreaterThan.class);
@@ -161,8 +232,8 @@ public class Vectorizer implements Physi
supportedGenericUDFs.add(UDFLog10.class);
supportedGenericUDFs.add(UDFLog.class);
supportedGenericUDFs.add(UDFPower.class);
- supportedGenericUDFs.add(UDFPosMod.class);
supportedGenericUDFs.add(GenericUDFRound.class);
+ supportedGenericUDFs.add(GenericUDFPosMod.class);
supportedGenericUDFs.add(UDFSqrt.class);
supportedGenericUDFs.add(UDFSign.class);
supportedGenericUDFs.add(UDFRand.class);
@@ -461,8 +532,10 @@ public class Vectorizer implements Physi
case REDUCESINK:
ret = validateReduceSinkOperator((ReduceSinkOperator) op);
break;
- case FILESINK:
case TABLESCAN:
+ ret = validateTableScanOperator((TableScanOperator) op);
+ break;
+ case FILESINK:
case LIMIT:
ret = true;
break;
@@ -473,6 +546,11 @@ public class Vectorizer implements Physi
return ret;
}
+ private boolean validateTableScanOperator(TableScanOperator op) {
+ TableScanDesc desc = op.getConf();
+ return !desc.isGatherStats();
+ }
+
private boolean validateMapJoinOperator(MapJoinOperator op) {
MapJoinDesc desc = op.getConf();
byte posBigTable = (byte) desc.getPosBigTable();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Tue Nov 26 08:19:25 2013
@@ -1138,8 +1138,8 @@ public abstract class BaseSemanticAnalyz
Map<ASTNode, ExprNodeDesc> astExprNodeMap)
throws SemanticException, HiveException {
- if ((astNode == null) || (astNode.getChildren() == null) ||
- (astNode.getChildren().size() <= 1)) {
+ if ((astNode == null) || (astNode.getChildren() == null) ||
+ (astNode.getChildren().size() == 0)) {
return;
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java Tue Nov 26 08:19:25 2013
@@ -118,56 +118,6 @@ public class MapReduceCompiler extends T
}
}
- // loop over all the tasks recursively
- @Override
- protected void generateCountersTask(Task<? extends Serializable> task) {
- if (task instanceof ExecDriver) {
- HashMap<String, Operator<? extends OperatorDesc>> opMap = ((MapredWork) task
- .getWork()).getMapWork().getAliasToWork();
- if (!opMap.isEmpty()) {
- for (Operator<? extends OperatorDesc> op : opMap.values()) {
- generateCountersOperator(op);
- }
- }
-
- if (((MapredWork)task.getWork()).getReduceWork() != null) {
- Operator<? extends OperatorDesc> reducer = ((MapredWork) task.getWork()).getReduceWork()
- .getReducer();
- LOG.info("Generating counters for operator " + reducer);
- generateCountersOperator(reducer);
- }
- } else if (task instanceof ConditionalTask) {
- List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task)
- .getListTasks();
- for (Task<? extends Serializable> tsk : listTasks) {
- generateCountersTask(tsk);
- }
- }
-
- // Start the counters from scratch - a hack for hadoop 17.
- Operator.resetLastEnumUsed();
-
- if (task.getChildTasks() == null) {
- return;
- }
-
- for (Task<? extends Serializable> childTask : task.getChildTasks()) {
- generateCountersTask(childTask);
- }
- }
-
- private void generateCountersOperator(Operator<? extends OperatorDesc> op) {
- op.assignCounterNameToEnum();
-
- if (op.getChildOperators() == null) {
- return;
- }
-
- for (Operator<? extends OperatorDesc> child : op.getChildOperators()) {
- generateCountersOperator(child);
- }
- }
-
@Override
public ParseContext getParseContext(ParseContext pCtx, List<Task<? extends Serializable>> rootTasks) {
return new ParseContext(conf, pCtx.getQB(), pCtx.getParseTree(),
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java Tue Nov 26 08:19:25 2013
@@ -935,6 +935,11 @@ public class PTFTranslator {
} else {
rr.put(cInfo.getTabAlias(), colAlias, cInfo);
}
+
+ String[] altMapping = inputRR.getAlternateMappings(inpCInfo.getInternalName());
+ if ( altMapping != null ) {
+ rr.put(altMapping[0], altMapping[1], cInfo);
+ }
}
return rr;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java Tue Nov 26 08:19:25 2013
@@ -200,8 +200,8 @@ public final class ParseUtils {
throw new SemanticException("Bad params for type decimal");
}
- int precision = HiveDecimal.DEFAULT_PRECISION;
- int scale = HiveDecimal.DEFAULT_SCALE;
+ int precision = HiveDecimal.USER_DEFAULT_PRECISION;
+ int scale = HiveDecimal.USER_DEFAULT_SCALE;
if (node.getChildCount() >= 1) {
String precStr = node.getChild(0).getText();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java Tue Nov 26 08:19:25 2013
@@ -69,6 +69,14 @@ public class QB {
* If this QB represents a SubQuery predicate then this will point to the SubQuery object.
*/
private QBSubQuery subQueryPredicateDef;
+
+ /*
+ * used to give a unique name to each SubQuery QB Currently there can be at
+ * most 2 SubQueries in a Query: 1 in the Where clause, and 1 in the Having
+ * clause.
+ */
+ private int numSubQueryPredicates;
+
// results
@@ -320,5 +328,13 @@ public class QB {
protected QBSubQuery getSubQueryPredicateDef() {
return subQueryPredicateDef;
}
+
+ protected int getNumSubQueryPredicates() {
+ return numSubQueryPredicates;
+ }
+
+ protected int incrNumSubQueryPredicates() {
+ return ++numSubQueryPredicates;
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java Tue Nov 26 08:19:25 2013
@@ -4,11 +4,14 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
+import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.DefaultExprProcessor;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -124,30 +127,56 @@ public class QBSubQuery {
public abstract ExprType combine(ExprType other);
}
+ /*
+ * This class captures the information about a
+ * conjunct in the where clause of the SubQuery.
+ * For a equality predicate it capture for each side:
+ * - the AST
+ * - the type of Expression (basically what columns are referenced)
+ * - for Expressions that refer the parent it captures the
+ * parent's ColumnInfo. In case of outer Aggregation expressions
+ * we need this to introduce a new mapping in the OuterQuery
+ * RowResolver. A join condition must use qualified column references,
+ * so we generate a new name for the aggr expression and use it in the
+ * joining condition.
+ * For e.g.
+ * having exists ( select x from R2 where y = min(R1.z) )
+ * where the expression 'min(R1.z)' is from the outer Query.
+ * We give this expression a new name like 'R1._gby_sq_col_1'
+ * and use the join condition: R1._gby_sq_col_1 = R2.y
+ */
static class Conjunct {
private final ASTNode leftExpr;
private final ASTNode rightExpr;
private final ExprType leftExprType;
private final ExprType rightExprType;
+ private final ColumnInfo leftOuterColInfo;
+ private final ColumnInfo rightOuterColInfo;
- public Conjunct(ASTNode leftExpr, ASTNode rightExpr, ExprType leftExprType,
- ExprType rightExprType) {
+ Conjunct(ASTNode leftExpr,
+ ASTNode rightExpr,
+ ExprType leftExprType,
+ ExprType rightExprType,
+ ColumnInfo leftOuterColInfo,
+ ColumnInfo rightOuterColInfo) {
super();
this.leftExpr = leftExpr;
this.rightExpr = rightExpr;
this.leftExprType = leftExprType;
this.rightExprType = rightExprType;
+ this.leftOuterColInfo = leftOuterColInfo;
+ this.rightOuterColInfo = rightOuterColInfo;
}
- public ASTNode getLeftExpr() {
+ ASTNode getLeftExpr() {
return leftExpr;
}
- public ASTNode getRightExpr() {
+ ASTNode getRightExpr() {
return rightExpr;
}
- public ExprType getLeftExprType() {
+ ExprType getLeftExprType() {
return leftExprType;
}
- public ExprType getRightExprType() {
+ ExprType getRightExprType() {
return rightExprType;
}
@@ -173,16 +202,28 @@ public class QBSubQuery {
}
return leftExprType.combine(rightExprType) == ExprType.REFERS_PARENT;
}
+ ColumnInfo getLeftOuterColInfo() {
+ return leftOuterColInfo;
+ }
+ ColumnInfo getRightOuterColInfo() {
+ return rightOuterColInfo;
+ }
}
class ConjunctAnalyzer {
RowResolver parentQueryRR;
+ boolean forHavingClause;
+ String parentQueryNewAlias;
NodeProcessor defaultExprProcessor;
Stack<Node> stack;
- ConjunctAnalyzer(RowResolver parentQueryRR) {
+ ConjunctAnalyzer(RowResolver parentQueryRR,
+ boolean forHavingClause,
+ String parentQueryNewAlias) {
this.parentQueryRR = parentQueryRR;
defaultExprProcessor = new DefaultExprProcessor();
+ this.forHavingClause = forHavingClause;
+ this.parentQueryNewAlias = parentQueryNewAlias;
stack = new Stack<Node>();
}
@@ -195,25 +236,34 @@ public class QBSubQuery {
* 3. All other expressions have a Type based on their children.
* An Expr w/o children is assumed to refer to neither.
*/
- private ExprType analyzeExpr(ASTNode expr) {
- ExprNodeDesc exprNode;
+ private ObjectPair<ExprType,ColumnInfo> analyzeExpr(ASTNode expr) {
+ ColumnInfo cInfo = null;
+ if ( forHavingClause ) {
+ try {
+ cInfo = parentQueryRR.getExpression(expr);
+ if ( cInfo != null) {
+ return ObjectPair.create(ExprType.REFERS_PARENT, cInfo);
+ }
+ } catch(SemanticException se) {
+ }
+ }
if ( expr.getType() == HiveParser.DOT) {
ASTNode dot = firstDot(expr);
- exprNode = resolveDot(dot);
- if ( exprNode != null ) {
- return ExprType.REFERS_PARENT;
+ cInfo = resolveDot(dot);
+ if ( cInfo != null ) {
+ return ObjectPair.create(ExprType.REFERS_PARENT, cInfo);
}
- return ExprType.REFERS_SUBQUERY;
+ return ObjectPair.create(ExprType.REFERS_SUBQUERY, null);
} else if ( expr.getType() == HiveParser.TOK_TABLE_OR_COL ) {
- return ExprType.REFERS_SUBQUERY;
+ return ObjectPair.create(ExprType.REFERS_SUBQUERY, null);
} else {
ExprType exprType = ExprType.REFERS_NONE;
int cnt = expr.getChildCount();
for(int i=0; i < cnt; i++) {
ASTNode child = (ASTNode) expr.getChild(i);
- exprType = exprType.combine(analyzeExpr(child));
+ exprType = exprType.combine(analyzeExpr(child).getFirst());
}
- return exprType;
+ return ObjectPair.create(exprType, null);
}
}
@@ -234,13 +284,17 @@ public class QBSubQuery {
if ( type == HiveParser.EQUAL ) {
ASTNode left = (ASTNode) conjunct.getChild(0);
ASTNode right = (ASTNode) conjunct.getChild(1);
- ExprType leftType = analyzeExpr(left);
- ExprType rightType = analyzeExpr(right);
+ ObjectPair<ExprType,ColumnInfo> leftInfo = analyzeExpr(left);
+ ObjectPair<ExprType,ColumnInfo> rightInfo = analyzeExpr(right);
- return new Conjunct(left, right, leftType, rightType);
+ return new Conjunct(left, right,
+ leftInfo.getFirst(), rightInfo.getFirst(),
+ leftInfo.getSecond(), rightInfo.getSecond());
} else {
- ExprType sqExprType = analyzeExpr(conjunct);
- return new Conjunct(conjunct, null, sqExprType, null);
+ ObjectPair<ExprType,ColumnInfo> sqExprInfo = analyzeExpr(conjunct);
+ return new Conjunct(conjunct, null,
+ sqExprInfo.getFirst(), null,
+ sqExprInfo.getSecond(), sqExprInfo.getSecond());
}
}
@@ -248,16 +302,20 @@ public class QBSubQuery {
* Try to resolve a qualified name as a column reference on the Parent Query's RowResolver.
* Apply this logic on the leftmost(first) dot in an AST tree.
*/
- protected ExprNodeDesc resolveDot(ASTNode node) {
+ protected ColumnInfo resolveDot(ASTNode node) {
try {
TypeCheckCtx tcCtx = new TypeCheckCtx(parentQueryRR);
String str = BaseSemanticAnalyzer.unescapeIdentifier(node.getChild(1).getText());
ExprNodeDesc idDesc = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str);
- return (ExprNodeDesc)
- defaultExprProcessor.process(node, stack, tcCtx, (Object) null, idDesc);
+ ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)
+ defaultExprProcessor.process(node, stack, tcCtx, (Object) null, idDesc);
+ if ( colDesc != null ) {
+ String[] qualName = parentQueryRR.reverseLookup(colDesc.getColumn());
+ return parentQueryRR.get(qualName[0], qualName[1]);
+ }
} catch(SemanticException se) {
- return null;
}
+ return null;
}
/*
@@ -295,6 +353,8 @@ public class QBSubQuery {
private int numOfCorrelationExprsAddedToSQSelect;
private boolean groupbyAddedToSQ;
+
+ private int numOuterCorrExprsForHaving;
public QBSubQuery(String outerQueryId,
int sqIdx,
@@ -311,6 +371,7 @@ public class QBSubQuery {
this.sqIdx = sqIdx;
this.alias = "sq_" + this.sqIdx;
this.numCorrExprsinSQ = 0;
+ this.numOuterCorrExprsForHaving = 0;
String s = ctx.getTokenRewriteStream().toString(
originalSQAST.getTokenStartIndex(), originalSQAST.getTokenStopIndex());
originalSQASTOrigin = new ASTNodeOrigin("SubQuery", alias, s, alias, originalSQAST);
@@ -328,7 +389,9 @@ public class QBSubQuery {
return operator;
}
- void validateAndRewriteAST(RowResolver outerQueryRR) throws SemanticException {
+ void validateAndRewriteAST(RowResolver outerQueryRR,
+ boolean forHavingClause,
+ String outerQueryAlias) throws SemanticException {
ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1);
@@ -359,7 +422,7 @@ public class QBSubQuery {
containsAggregationExprs = containsAggregationExprs | ( r == 1 );
}
- rewrite(outerQueryRR);
+ rewrite(outerQueryRR, forHavingClause, outerQueryAlias);
SubQueryUtils.setOriginDeep(subQueryAST, originalSQASTOrigin);
@@ -418,14 +481,28 @@ public class QBSubQuery {
}
}
- void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR) throws SemanticException {
+ void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR,
+ boolean forHavingClause,
+ String outerQueryAlias) throws SemanticException {
ASTNode parentQueryJoinCond = null;
if ( parentQueryExpression != null ) {
+
+ ColumnInfo outerQueryCol = null;
+ try {
+ outerQueryCol = outerQueryRR.getExpression(parentQueryExpression);
+ } catch(SemanticException se) {
+ }
+
parentQueryJoinCond = SubQueryUtils.buildOuterQryToSQJoinCond(
getOuterQueryExpression(),
alias,
sqRR);
+
+ if ( outerQueryCol != null ) {
+ rewriteCorrConjunctForHaving(parentQueryJoinCond, true,
+ outerQueryAlias, outerQueryRR, outerQueryCol);
+ }
}
joinConditionAST = SubQueryUtils.andAST(parentQueryJoinCond, joinConditionAST);
setJoinType();
@@ -494,8 +571,25 @@ public class QBSubQuery {
* expression to its GroupBy; add it to the front of the GroupBy.
* - If predicate is not correlated, let it remain in the SubQuery
* where clause.
+ * Additional things for Having clause:
+ * - A correlation predicate may refer to an aggregation expression.
+ * - This introduces 2 twists to the rewrite:
+ * a. When analyzing equality predicates we need to analyze each side
+ * to see if it is an aggregation expression from the Outer Query.
+ * So for e.g. this is a valid correlation predicate:
+ * R2.x = min(R1.y)
+ * Where R1 is an outer table reference, and R2 is a SubQuery table reference.
+ * b. When hoisting the correlation predicate to a join predicate, we need to
+ * rewrite it to be in the form the Join code allows: so the predicte needs
+ * to contain a qualified column references.
+ * We handle this by generating a new name for the aggregation expression,
+ * like R1._gby_sq_col_1 and adding this mapping to the Outer Query's
+ * Row Resolver. Then we construct a joining predicate using this new
+ * name; so in our e.g. the condition would be: R2.x = R1._gby_sq_col_1
*/
- private void rewrite(RowResolver parentQueryRR) throws SemanticException {
+ private void rewrite(RowResolver parentQueryRR,
+ boolean forHavingClause,
+ String outerQueryAlias) throws SemanticException {
ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1);
ASTNode whereClause = null;
if ( subQueryAST.getChild(1).getChildCount() > 2 &&
@@ -511,7 +605,8 @@ public class QBSubQuery {
List<ASTNode> conjuncts = new ArrayList<ASTNode>();
SubQueryUtils.extractConjuncts(searchCond, conjuncts);
- ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR);
+ ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR,
+ forHavingClause, outerQueryAlias);
ASTNode sqNewSearchCond = null;
for(ASTNode conjunctAST : conjuncts) {
@@ -545,6 +640,10 @@ public class QBSubQuery {
ASTNode sqExprForCorr = SubQueryUtils.createColRefAST(alias, exprAlias);
if ( conjunct.getLeftExprType().refersSubQuery() ) {
+ if ( forHavingClause && conjunct.getRightOuterColInfo() != null ) {
+ rewriteCorrConjunctForHaving(conjunctAST, false, outerQueryAlias,
+ parentQueryRR, conjunct.getRightOuterColInfo());
+ }
ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate(
conjunctAST, sqExprForCorr, true);
joinConditionAST = SubQueryUtils.andAST(joinConditionAST, joinPredciate);
@@ -557,6 +656,10 @@ public class QBSubQuery {
SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getLeftExpr());
}
} else {
+ if ( forHavingClause && conjunct.getLeftOuterColInfo() != null ) {
+ rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias,
+ parentQueryRR, conjunct.getLeftOuterColInfo());
+ }
ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate(
conjunctAST, sqExprForCorr, false);
joinConditionAST = SubQueryUtils.andAST(joinConditionAST, joinPredciate);
@@ -642,4 +745,21 @@ public class QBSubQuery {
public int getNumOfCorrelationExprsAddedToSQSelect() {
return numOfCorrelationExprsAddedToSQSelect;
}
+
+ private void rewriteCorrConjunctForHaving(ASTNode conjunctASTNode,
+ boolean refersLeft,
+ String outerQueryAlias,
+ RowResolver outerQueryRR,
+ ColumnInfo outerQueryCol) {
+
+ String newColAlias = "_gby_sq_col_" + numOuterCorrExprsForHaving++;
+ ASTNode outerExprForCorr = SubQueryUtils.createColRefAST(outerQueryAlias, newColAlias);
+ if ( refersLeft ) {
+ conjunctASTNode.setChild(0, outerExprForCorr);
+ } else {
+ conjunctASTNode.setChild(1, outerExprForCorr);
+ }
+ outerQueryRR.put(outerQueryAlias, newColAlias, outerQueryCol);
+ }
+
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java Tue Nov 26 08:19:25 2013
@@ -43,6 +43,13 @@ public class RowResolver implements Seri
private HashMap<String, LinkedHashMap<String, ColumnInfo>> rslvMap;
private HashMap<String, String[]> invRslvMap;
+ /*
+ * now a Column can have an alternate mapping.
+ * This captures the alternate mapping.
+ * The primary(first) mapping is still only held in
+ * invRslvMap.
+ */
+ private Map<String, String[]> altInvRslvMap;
private Map<String, ASTNode> expressionMap;
// TODO: Refactor this and do in a more object oriented manner
@@ -55,6 +62,7 @@ public class RowResolver implements Seri
rowSchema = new RowSchema();
rslvMap = new HashMap<String, LinkedHashMap<String, ColumnInfo>>();
invRslvMap = new HashMap<String, String[]>();
+ altInvRslvMap = new HashMap<String, String[]>();
expressionMap = new HashMap<String, ASTNode>();
isExprResolver = false;
}
@@ -96,8 +104,17 @@ public class RowResolver implements Seri
if (rowSchema.getSignature() == null) {
rowSchema.setSignature(new ArrayList<ColumnInfo>());
}
-
- rowSchema.getSignature().add(colInfo);
+
+ /*
+ * allow multiple mappings to the same ColumnInfo.
+ * When a ColumnInfo is mapped multiple times, only the
+ * first inverse mapping is captured.
+ */
+ boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName());
+
+ if ( !colPresent ) {
+ rowSchema.getSignature().add(colInfo);
+ }
LinkedHashMap<String, ColumnInfo> f_map = rslvMap.get(tab_alias);
if (f_map == null) {
@@ -109,7 +126,11 @@ public class RowResolver implements Seri
String[] qualifiedAlias = new String[2];
qualifiedAlias[0] = tab_alias;
qualifiedAlias[1] = col_alias;
- invRslvMap.put(colInfo.getInternalName(), qualifiedAlias);
+ if ( !colPresent ) {
+ invRslvMap.put(colInfo.getInternalName(), qualifiedAlias);
+ } else {
+ altInvRslvMap.put(colInfo.getInternalName(), qualifiedAlias);
+ }
}
public boolean hasTableAlias(String tab_alias) {
@@ -149,14 +170,21 @@ public class RowResolver implements Seri
ret = f_map.get(col_alias);
} else {
boolean found = false;
- for (LinkedHashMap<String, ColumnInfo> cmap : rslvMap.values()) {
+ String foundTbl = null;
+ for (Map.Entry<String, LinkedHashMap<String, ColumnInfo>> rslvEntry: rslvMap.entrySet()) {
+ String rslvKey = rslvEntry.getKey();
+ LinkedHashMap<String, ColumnInfo> cmap = rslvEntry.getValue();
for (Map.Entry<String, ColumnInfo> cmapEnt : cmap.entrySet()) {
if (col_alias.equalsIgnoreCase(cmapEnt.getKey())) {
- if (found) {
+ /*
+ * We can have an unaliased and one aliased mapping to a Column.
+ */
+ if (found && foundTbl != null && rslvKey != null) {
throw new SemanticException("Column " + col_alias
+ " Found in more than One Tables/Subqueries");
}
found = true;
+ foundTbl = rslvKey == null ? foundTbl : rslvKey;
ret = cmapEnt.getValue();
}
}
@@ -260,6 +288,10 @@ public class RowResolver implements Seri
public boolean getIsExprResolver() {
return isExprResolver;
}
+
+ public String[] getAlternateMappings(String internalName) {
+ return altInvRslvMap.get(internalName);
+ }
@Override
public String toString() {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Nov 26 08:19:25 2013
@@ -1897,7 +1897,8 @@ public class SemanticAnalyzer extends Ba
}
@SuppressWarnings("nls")
- private Operator genHavingPlan(String dest, QB qb, Operator input)
+ private Operator genHavingPlan(String dest, QB qb, Operator input,
+ Map<String, Operator> aliasToOpInfo)
throws SemanticException {
ASTNode havingExpr = qb.getParseInfo().getHavingForClause(dest);
@@ -1912,21 +1913,24 @@ public class SemanticAnalyzer extends Ba
}
ASTNode condn = (ASTNode) havingExpr.getChild(0);
- Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
- new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema(
- inputRR.getColumnInfos()), input), inputRR);
-
+ /*
+ * Now a having clause can contain a SubQuery predicate;
+ * so we invoke genFilterPlan to handle SubQuery algebraic transformation,
+ * just as is done for SubQuery predicates appearing in the Where Clause.
+ */
+ Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, true);
+ output = putOpInsertMap(output, inputRR);
return output;
}
@SuppressWarnings("nls")
- private Operator genFilterPlan(String dest, QB qb, Operator input,
- Map<String, Operator> aliasToOpInfo)
+ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input,
+ Map<String, Operator> aliasToOpInfo,
+ boolean forHavingClause)
throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
- ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest);
/*
* Handling of SubQuery Expressions:
@@ -1949,8 +1953,16 @@ public class SemanticAnalyzer extends Ba
* --> ===CONTINUE_FILTER_PROCESSING===
* endif
* endif
+ *
+ * Support for Sub Queries in Having Clause:
+ * - By and large this works the same way as SubQueries in the Where Clause.
+ * - The one addum is the handling of aggregation expressions from the Outer Query
+ * appearing in correlation clauses.
+ * - So such correlating predicates are allowed:
+ * min(OuterQuert.x) = SubQuery.y
+ * - this requires special handling when converting to joins. See QBSubQuery.rewrite
+ * method method for detailed comments.
*/
- ASTNode searchCond = (ASTNode) whereExpr.getChild(0);
List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
if ( subQueriesInOriginalTree.size() > 0 ) {
@@ -1982,13 +1994,20 @@ public class SemanticAnalyzer extends Ba
ASTNode subQueryAST = subQueries.get(i);
ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
- int sqIdx = i+1;
+ int sqIdx = qb.incrNumSubQueryPredicates();
clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(),
sqIdx, subQueryAST, originalSubQueryAST, ctx);
+
+ String havingInputAlias = null;
+
+ if ( forHavingClause ) {
+ havingInputAlias = "gby_sq" + sqIdx;
+ aliasToOpInfo.put(havingInputAlias, input);
+ }
- subQuery.validateAndRewriteAST(inputRR);
+ subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias);
QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
qbSQ.setSubQueryDef(subQuery);
@@ -2014,7 +2033,7 @@ public class SemanticAnalyzer extends Ba
/*
* Gen Join between outer Operator and SQ op
*/
- subQuery.buildJoinCondition(inputRR, sqRR);
+ subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias);
QBJoinTree joinTree = genSQJoinTree(qb, subQuery,
input,
aliasToOpInfo);
@@ -2084,6 +2103,12 @@ public class SemanticAnalyzer extends Ba
if (!aliases.contains("")) {
aliases.add("");
}
+ /*
+ * track the input ColumnInfos that are added to the output.
+ * if a columnInfo has multiple mappings; then add the column only once,
+ * but carry the mappings forward.
+ */
+ Map<ColumnInfo, ColumnInfo> inputColsProcessed = new HashMap<ColumnInfo, ColumnInfo>();
// For expr "*", aliases should be iterated in the order they are specified
// in the query.
for (String alias : aliases) {
@@ -2112,16 +2137,21 @@ public class SemanticAnalyzer extends Ba
continue;
}
- ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(),
- name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol());
if (subQuery) {
output.checkColumn(tmp[0], tmp[1]);
}
- col_list.add(expr);
- output.put(tmp[0], tmp[1],
- new ColumnInfo(getColumnInternalName(pos), colInfo.getType(),
- colInfo.getTabAlias(), colInfo.getIsVirtualCol(),
- colInfo.isHiddenVirtualCol()));
+ ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
+ if (oColInfo == null) {
+ ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(),
+ name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(),
+ colInfo.isSkewedCol());
+ col_list.add(expr);
+ oColInfo = new ColumnInfo(getColumnInternalName(pos),
+ colInfo.getType(), colInfo.getTabAlias(),
+ colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
+ inputColsProcessed.put(colInfo, oColInfo);
+ }
+ output.put(tmp[0], tmp[1], oColInfo);
pos = Integer.valueOf(pos.intValue() + 1);
matched++;
@@ -2916,6 +2946,14 @@ public class SemanticAnalyzer extends Ba
colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp)
.isSkewedCol() : false);
out_rwsch.put(tabAlias, colAlias, colInfo);
+
+ if ( exp instanceof ExprNodeColumnDesc ) {
+ ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp;
+ String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn());
+ if ( altMapping != null ) {
+ out_rwsch.put(altMapping[0], altMapping[1], colInfo);
+ }
+ }
pos = Integer.valueOf(pos.intValue() + 1);
}
@@ -3177,8 +3215,10 @@ public class SemanticAnalyzer extends Ba
.getInternalName(), "", false));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
+ ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), null, false);
groupByOutputRowResolver.putExpression(grpbyExpr,
- new ColumnInfo(field, exprInfo.getType(), null, false));
+ oColInfo);
+ addAlternateGByKeyMappings(grpbyExpr, oColInfo, input, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// For each aggregation
@@ -3386,8 +3426,10 @@ public class SemanticAnalyzer extends Ba
.getIsVirtualCol()));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
+ ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver.putExpression(grpbyExpr,
- new ColumnInfo(field, exprInfo.getType(), "", false));
+ oColInfo);
+ addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
@@ -4168,8 +4210,10 @@ public class SemanticAnalyzer extends Ba
exprInfo.getTabAlias(), exprInfo.getIsVirtualCol()));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
+ ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver2.putExpression(grpbyExpr,
- new ColumnInfo(field, exprInfo.getType(), "", false));
+ oColInfo);
+ addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
@@ -4385,14 +4429,15 @@ public class SemanticAnalyzer extends Ba
curr = forwardOp;
if (parseInfo.getWhrForClause(dest) != null) {
- curr = genFilterPlan(dest, qb, forwardOp, aliasToOpInfo);
+ ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest);
+ curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, forwardOp, aliasToOpInfo, false);
}
// Generate GroupbyOperator
Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo,
dest, curr, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null);
- curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb);
+ curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb, aliasToOpInfo);
}
return curr;
@@ -5229,11 +5274,7 @@ public class SemanticAnalyzer extends Ba
qbm.setDPCtx(dest, dpCtx);
}
- if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP
- // turn on hive.task.progress to update # of partitions created to the JT
- HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS, true);
-
- } else { // QBMetaData.DEST_PARTITION capture the all-SP case
+ if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP
throw new SemanticException(generateErrorMessage(
qb.getParseInfo().getDestForClause(dest),
ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg()));
@@ -7753,7 +7794,8 @@ public class SemanticAnalyzer extends Ba
curr = inputs.get(dest);
if (qbp.getWhrForClause(dest) != null) {
- curr = genFilterPlan(dest, qb, curr, aliasToOpInfo);
+ ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest);
+ curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, curr, aliasToOpInfo, false);
}
if (qbp.getAggregationExprsForClause(dest).size() != 0
@@ -7780,7 +7822,7 @@ public class SemanticAnalyzer extends Ba
}
}
- curr = genPostGroupByBodyPlan(curr, dest, qb);
+ curr = genPostGroupByBodyPlan(curr, dest, qb, aliasToOpInfo);
}
} else {
curr = genGroupByPlan1ReduceMultiGBY(commonGroupByDestGroup, qb, input, aliasToOpInfo);
@@ -7806,7 +7848,8 @@ public class SemanticAnalyzer extends Ba
return inputs;
}
- private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb)
+ private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb,
+ Map<String, Operator> aliasToOpInfo)
throws SemanticException {
QBParseInfo qbp = qb.getParseInfo();
@@ -7816,7 +7859,7 @@ public class SemanticAnalyzer extends Ba
if (getGroupByForClause(qbp, dest).size() == 0) {
throw new SemanticException("HAVING specified without GROUP BY");
}
- curr = genHavingPlan(dest, qb, curr);
+ curr = genHavingPlan(dest, qb, curr, aliasToOpInfo);
}
@@ -10707,7 +10750,10 @@ public class SemanticAnalyzer extends Ba
outColName, colInfo.getType(), alias[0],
colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
rsNewRR.put(alias[0], alias[1], newColInfo);
-
+ String[] altMapping = inputRR.getAlternateMappings(colInfo.getInternalName());
+ if ( altMapping != null ) {
+ rsNewRR.put(altMapping[0], altMapping[1], newColInfo);
+ }
}
input = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
@@ -10753,6 +10799,10 @@ public class SemanticAnalyzer extends Ba
colsAddedByHaving.put(alias, eColInfo);
}
}
+ String[] altMapping = inputRR.getAlternateMappings(colInfo.getInternalName());
+ if ( altMapping != null ) {
+ extractRR.put(altMapping[0], altMapping[1], eColInfo);
+ }
}
for(Map.Entry<String[], ColumnInfo> columnAddedByHaving : colsAddedByHaving.entrySet() ) {
@@ -10821,4 +10871,40 @@ public class SemanticAnalyzer extends Ba
return selSpec;
}
+ private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo,
+ Operator<? extends OperatorDesc> reduceSinkOp, RowResolver gByRR) {
+ if ( gByExpr.getType() == HiveParser.DOT
+ && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL ) {
+ String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr
+ .getChild(0).getChild(0).getText());
+ String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(
+ gByExpr.getChild(1).getText());
+ gByRR.put(tab_alias, col_alias, colInfo);
+ } else if ( gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL ) {
+ String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr
+ .getChild(0).getText());
+ String tab_alias = null;
+ /*
+ * If the input to the GBy has a tab alias for the column, then add an entry
+ * based on that tab_alias.
+ * For e.g. this query:
+ * select b.x, count(*) from t1 b group by x
+ * needs (tab_alias=b, col_alias=x) in the GBy RR.
+ * tab_alias=b comes from looking at the RowResolver that is the ancestor
+ * before any GBy/ReduceSinks added for the GBY operation.
+ */
+ Operator<? extends OperatorDesc> parent = reduceSinkOp;
+ while ( parent instanceof ReduceSinkOperator ||
+ parent instanceof GroupByOperator ) {
+ parent = parent.getParentOperators().get(0);
+ }
+ RowResolver parentRR = opParseCtx.get(parent).getRowResolver();
+ try {
+ ColumnInfo pColInfo = parentRR.get(tab_alias, col_alias);
+ tab_alias = pColInfo == null ? null : pColInfo.getTabAlias();
+ } catch(SemanticException se) {
+ }
+ gByRR.put(tab_alias, col_alias, colInfo);
+ }
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java Tue Nov 26 08:19:25 2013
@@ -211,13 +211,6 @@ public abstract class TaskCompiler {
optimizeTaskPlan(rootTasks, pCtx, ctx);
- // For each operator, generate the counters if needed
- if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS)) {
- for (Task<? extends Serializable> rootTask : rootTasks) {
- generateCountersTask(rootTask);
- }
- }
-
decideExecMode(rootTasks, ctx, globalLimitCtx);
if (qb.isCTAS()) {
@@ -340,11 +333,6 @@ public abstract class TaskCompiler {
GlobalLimitCtx globalLimitCtx) throws SemanticException;
/*
- * Called to setup counters for the generated tasks
- */
- protected abstract void generateCountersTask(Task<? extends Serializable> rootTask);
-
- /*
* Called at the beginning of the compile phase to have another chance to optimize the operator plan
*/
protected void optimizeOperatorPlan(ParseContext pCtxSet, Set<ReadEntity> inputs,
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java Tue Nov 26 08:19:25 2013
@@ -96,14 +96,6 @@ public class TezCompiler extends TaskCom
opRules.put(new RuleRegExp(new String("Convert Join to Map-join"),
JoinOperator.getOperatorName() + "%"), new ConvertJoinMapJoin());
- // if this is an explain statement add rule to generate statistics for
- // the whole tree.
- if (pCtx.getContext().getExplain()) {
- opRules.put(new RuleRegExp(new String("Set statistics - FileSink"),
- FileSinkOperator.getOperatorName() + "%"),
- new SetStatistics());
- }
-
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
@@ -242,48 +234,6 @@ public class TezCompiler extends TaskCom
}
@Override
- protected void generateCountersTask(Task<? extends Serializable> task) {
- if (task instanceof TezTask) {
- TezWork work = ((TezTask)task).getWork();
- List<BaseWork> workItems = work.getAllWork();
- for (BaseWork w: workItems) {
- List<Operator<?>> ops = w.getAllOperators();
- for (Operator<?> op: ops) {
- generateCountersOperator(op);
- }
- }
- } else if (task instanceof ConditionalTask) {
- List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task)
- .getListTasks();
- for (Task<? extends Serializable> tsk : listTasks) {
- generateCountersTask(tsk);
- }
- }
-
- Operator.resetLastEnumUsed();
-
- if (task.getChildTasks() == null) {
- return;
- }
-
- for (Task<? extends Serializable> childTask : task.getChildTasks()) {
- generateCountersTask(childTask);
- }
- }
-
- private void generateCountersOperator(Operator<?> op) {
- op.assignCounterNameToEnum();
-
- if (op.getChildOperators() == null) {
- return;
- }
-
- for (Operator<?> child : op.getChildOperators()) {
- generateCountersOperator(child);
- }
- }
-
- @Override
protected void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx,
GlobalLimitCtx globalLimitCtx)
throws SemanticException {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Tue Nov 26 08:19:25 2013
@@ -63,7 +63,6 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java Tue Nov 26 08:19:25 2013
@@ -19,9 +19,9 @@
package org.apache.hadoop.hive.ql.plan;
public class AbstractOperatorDesc implements OperatorDesc {
- protected transient Statistics statistics;
private boolean vectorMode = false;
+ protected transient Statistics statistics;
@Override
@Explain(displayName = "Statistics", normalExplain = false)
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java Tue Nov 26 08:19:25 2013
@@ -133,7 +133,7 @@ public class ExprNodeGenericFuncDesc ext
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append(genericUDF.getClass().toString());
+ sb.append(genericUDF.getClass().getSimpleName());
sb.append("(");
for (int i = 0; i < chidren.size(); i++) {
if (i > 0) {
@@ -141,7 +141,6 @@ public class ExprNodeGenericFuncDesc ext
}
sb.append(chidren.get(i).toString());
}
- sb.append("(");
sb.append(")");
return sb.toString();
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java Tue Nov 26 08:19:25 2013
@@ -187,6 +187,7 @@ public class JoinDesc extends AbstractOp
this.tagOrder = clone.tagOrder;
this.filters = clone.filters;
this.filterMap = clone.filterMap;
+ this.statistics = clone.statistics;
}
public Map<Byte, List<ExprNodeDesc>> getExprs() {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java Tue Nov 26 08:19:25 2013
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.plan;
import java.io.Serializable;
+import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec;
/**
@@ -49,6 +50,8 @@ public class StatsWork implements Serial
private boolean isPartialScanAnalyzeCommand = false;
+ private transient Task<?> sourceTask;
+
public StatsWork() {
}
@@ -140,4 +143,12 @@ public class StatsWork implements Serial
public void setPartialScanAnalyzeCommand(boolean isPartialScanAnalyzeCommand) {
this.isPartialScanAnalyzeCommand = isPartialScanAnalyzeCommand;
}
+
+ public Task<?> getSourceTask() {
+ return sourceTask;
+ }
+
+ public void setSourceTask(Task<?> sourceTask) {
+ this.sourceTask = sourceTask;
+ }
}
Copied: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java (from r1545525, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java)
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java?p2=hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java&p1=hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java&r1=1545525&r2=1545564&rev=1545564&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java Tue Nov 26 08:19:25 2013
@@ -24,6 +24,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
+import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.JobClient;
@@ -38,10 +39,10 @@ public class CounterStatsAggregator impl
private JobClient jc;
@Override
- public boolean connect(Configuration hconf, MapRedTask sourceTask) {
+ public boolean connect(Configuration hconf, Task<?> sourceTask) {
try {
jc = new JobClient(toJobConf(hconf));
- RunningJob job = jc.getJob(sourceTask.getJobID());
+ RunningJob job = jc.getJob(((MapRedTask)sourceTask).getJobID());
if (job != null) {
counters = job.getCounters();
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsAggregator.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsAggregator.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsAggregator.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsAggregator.java Tue Nov 26 08:19:25 2013
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.stats;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.Task;
/**
* An interface for any possible implementation for gathering statistics.
@@ -31,9 +32,10 @@ public interface StatsAggregator {
*
* @param hconf
* HiveConf that contains the connection parameters.
+ * @param sourceTask
* @return true if connection is successful, false otherwise.
*/
- public boolean connect(Configuration hconf);
+ public boolean connect(Configuration hconf, Task<?> sourceTask);
/**
* This method aggregates a given statistic from all tasks (partial stats).
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsFactory.java Tue Nov 26 08:19:25 2013
@@ -24,7 +24,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.JavaUtils;
-import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.util.ReflectionUtils;
@@ -36,9 +36,13 @@ public final class StatsFactory {
static final private Log LOG = LogFactory.getLog(StatsFactory.class.getName());
- private static Class <? extends Serializable> publisherImplementation;
- private static Class <? extends Serializable> aggregatorImplementation;
- private static Configuration jobConf;
+ private Class <? extends Serializable> publisherImplementation;
+ private Class <? extends Serializable> aggregatorImplementation;
+ private Configuration jobConf;
+
+ public static StatsFactory newFactory(Configuration conf) {
+ return newFactory(HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS), conf);
+ }
/**
* Sets the paths of the implementation classes of publishing
@@ -46,53 +50,30 @@ public final class StatsFactory {
* The paths are determined according to a configuration parameter which
* is passed as the user input for choosing the implementation as MySQL, HBase, ...
*/
- public static boolean setImplementation(String configurationParam, Configuration conf) {
+ public static StatsFactory newFactory(String configurationParam, Configuration conf) {
+ StatsFactory factory = new StatsFactory(conf);
+ if (factory.initialize(configurationParam.toLowerCase())) {
+ return factory;
+ }
+ return null;
+ }
+ private StatsFactory(Configuration conf) {
+ this.jobConf = conf;
+ }
+
+ private boolean initialize(String type) {
ClassLoader classLoader = JavaUtils.getClassLoader();
- if (configurationParam.equals(StatsSetupConst.HBASE_IMPL_CLASS_VAL)) {
- // Case: hbase
- try {
- publisherImplementation = (Class<? extends Serializable>)
- Class.forName("org.apache.hadoop.hive.hbase.HBaseStatsPublisher", true, classLoader);
-
- aggregatorImplementation = (Class<? extends Serializable>)
- Class.forName("org.apache.hadoop.hive.hbase.HBaseStatsAggregator", true, classLoader);
- } catch (ClassNotFoundException e) {
- LOG.error("HBase Publisher/Aggregator classes cannot be loaded.", e);
- return false;
- }
- } else if (configurationParam.contains(StatsSetupConst.JDBC_IMPL_CLASS_VAL)) {
- // Case: jdbc:mysql or jdbc:derby
- try {
- publisherImplementation = (Class<? extends Serializable>)
- Class.forName("org.apache.hadoop.hive.ql.stats.jdbc.JDBCStatsPublisher", true, classLoader);
-
- aggregatorImplementation = (Class<? extends Serializable>)
- Class.forName("org.apache.hadoop.hive.ql.stats.jdbc.JDBCStatsAggregator", true, classLoader);
- } catch (ClassNotFoundException e) {
- LOG.error("JDBC Publisher/Aggregator classes cannot be loaded.", e);
- return false;
- }
- } else {
- // try default stats publisher/aggregator
- String defPublisher = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_DEFAULT_PUBLISHER);
- String defAggregator = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_DEFAULT_AGGREGATOR);
- // ERROR no default publisher/aggregator is defined
- if (defPublisher == null || defAggregator == null) {
- return false;
- }
- try{
- publisherImplementation = (Class<? extends Serializable>)
- Class.forName(defPublisher, true, classLoader);
- aggregatorImplementation = (Class<? extends Serializable>)
- Class.forName(defAggregator, true, classLoader);
- } catch (ClassNotFoundException e) {
- LOG.error("JDBC Publisher/Aggregator classes cannot be loaded.", e);
- return false;
- }
+ try {
+ StatDB statDB = type.startsWith("jdbc") ? StatDB.jdbc : StatDB.valueOf(type);
+ publisherImplementation = (Class<? extends Serializable>)
+ Class.forName(statDB.getPublisher(jobConf), true, classLoader);
+ aggregatorImplementation = (Class<? extends Serializable>)
+ Class.forName(statDB.getAggregator(jobConf), true, classLoader);
+ } catch (Exception e) {
+ LOG.error(type + " Publisher/Aggregator classes cannot be loaded.", e);
+ return false;
}
-
- jobConf = conf;
return true;
}
@@ -100,7 +81,7 @@ public final class StatsFactory {
* Returns a Stats publisher implementation class for the IStatsPublisher interface
* For example HBaseStatsPublisher for the HBase implementation
*/
- public static StatsPublisher getStatsPublisher() {
+ public StatsPublisher getStatsPublisher() {
return (StatsPublisher) ReflectionUtils.newInstance(publisherImplementation, jobConf);
}
@@ -109,7 +90,7 @@ public final class StatsFactory {
* Returns a Stats Aggregator implementation class for the IStatsAggregator interface
* For example HBaseStatsAggregator for the HBase implementation
*/
- public static StatsAggregator getStatsAggregator() {
+ public StatsAggregator getStatsAggregator() {
return (StatsAggregator) ReflectionUtils.newInstance(aggregatorImplementation, jobConf);
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java Tue Nov 26 08:19:25 2013
@@ -32,6 +32,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.stats.StatsAggregator;
@@ -40,6 +41,7 @@ public class JDBCStatsAggregator impleme
private Connection conn;
private String connectionString;
private Configuration hiveconf;
+ private Task<?> sourceTask;
private final Map<String, PreparedStatement> columnMapping;
private final Log LOG = LogFactory.getLog(this.getClass().getName());
private int timeout = 30;
@@ -53,7 +55,7 @@ public class JDBCStatsAggregator impleme
}
@Override
- public boolean connect(Configuration hiveconf) {
+ public boolean connect(Configuration hiveconf, Task<?> sourceTask) {
this.hiveconf = hiveconf;
timeout = HiveConf.getIntVar(hiveconf, HiveConf.ConfVars.HIVE_STATS_JDBC_TIMEOUT);
connectionString = HiveConf.getVar(hiveconf, HiveConf.ConfVars.HIVESTATSDBCONNECTIONSTRING);
@@ -157,7 +159,7 @@ public class JDBCStatsAggregator impleme
} catch (InterruptedException iex) {
}
// getting a new connection
- if (!connect(hiveconf)) {
+ if (!connect(hiveconf, sourceTask)) {
// if cannot reconnect, just fail because connect() already handles retries.
LOG.error("Error during publishing aggregation. " + e);
return null;
@@ -235,7 +237,7 @@ public class JDBCStatsAggregator impleme
} catch (InterruptedException iex) {
}
// getting a new connection
- if (!connect(hiveconf)) {
+ if (!connect(hiveconf, sourceTask)) {
LOG.error("Error during clean-up. " + e);
return false;
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java Tue Nov 26 08:19:25 2013
@@ -185,7 +185,7 @@ public class GenericUDFBridge extends Ge
// If the returned value is HiveDecimal, we assume maximum precision/scale.
if (result != null && result instanceof HiveDecimalWritable) {
result = HiveDecimalUtils.enforcePrecisionScale((HiveDecimalWritable) result,
- HiveDecimal.MAX_PRECISION, HiveDecimal.MAX_SCALE);
+ HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
}
return result;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java Tue Nov 26 08:19:25 2013
@@ -25,11 +25,11 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
/**
- * GenericUDF Class for SQL construct
- * "CASE WHEN a THEN b WHEN c THEN d [ELSE f] END".
+ * GenericUDF Class for SQL construct "CASE a WHEN b THEN c [ELSE f] END".
*
- * NOTES: 1. a and c should be boolean, or an exception will be thrown. 2. b, d
- * and f should have the same TypeInfo, or an exception will be thrown.
+ * NOTES: 1. a and b should be compatible, or an exception will be
+ * thrown. 2. c and f should be compatible types, or an exception will be
+ * thrown.
*/
public class GenericUDFCase extends GenericUDF {
private transient ObjectInspector[] argumentOIs;
@@ -40,8 +40,8 @@ public class GenericUDFCase extends Gene
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException {
argumentOIs = arguments;
- caseOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver();
- returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver();
+ caseOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
+ returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
boolean r = caseOIResolver.update(arguments[0]);
assert (r);
@@ -79,12 +79,13 @@ public class GenericUDFCase extends Gene
Object exprValue = arguments[0].get();
for (int i = 1; i + 1 < arguments.length; i += 2) {
Object caseKey = arguments[i].get();
- if (PrimitiveObjectInspectorUtils.comparePrimitiveObjects(exprValue,
- (PrimitiveObjectInspector) argumentOIs[0], caseKey,
- (PrimitiveObjectInspector) argumentOIs[i])) {
+ // May need to convert to common type to compare
+ PrimitiveObjectInspector caseOI = (PrimitiveObjectInspector) caseOIResolver.get();
+ if (PrimitiveObjectInspectorUtils.comparePrimitiveObjects(
+ caseOIResolver.convertIfNecessary(exprValue, argumentOIs[0]), caseOI,
+ caseOIResolver.convertIfNecessary(caseKey, argumentOIs[i]), caseOI)) {
Object caseValue = arguments[i + 1].get();
- return returnOIResolver.convertIfNecessary(caseValue,
- argumentOIs[i + 1]);
+ return returnOIResolver.convertIfNecessary(caseValue, argumentOIs[i + 1]);
}
}
// Process else statement
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java Tue Nov 26 08:19:25 2013
@@ -43,7 +43,10 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Text;
@@ -140,6 +143,18 @@ public final class GenericUDFUtils {
return false;
}
+ /**
+ * TODO: Hack fix until HIVE-5848 is addressed. non-exact type shouldn't be promoted
+ * to exact type, as FunctionRegistry.getCommonClass() might do. This corrects
+ * that.
+ */
+ if (commonTypeInfo instanceof DecimalTypeInfo) {
+ if ((!FunctionRegistry.isExactNumericType((PrimitiveTypeInfo) oiTypeInfo)) ||
+ (!FunctionRegistry.isExactNumericType((PrimitiveTypeInfo) rTypeInfo))) {
+ commonTypeInfo = TypeInfoFactory.doubleTypeInfo;
+ }
+ }
+
returnObjectInspector = TypeInfoUtils
.getStandardWritableObjectInspectorFromTypeInfo(commonTypeInfo);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java Tue Nov 26 08:19:25 2013
@@ -25,11 +25,11 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
/**
- * GenericUDF Class for SQL construct "CASE a WHEN b THEN c [ELSE f] END".
+ * GenericUDF Class for SQL construct
+ * "CASE WHEN a THEN b WHEN c THEN d [ELSE f] END".
*
- * NOTES: 1. a and b should have the same TypeInfo, or an exception will be
- * thrown. 2. c and f should have the same TypeInfo, or an exception will be
- * thrown.
+ * NOTES: 1. a and c should be boolean, or an exception will be thrown. 2. b, d
+ * and f should be common types, or an exception will be thrown.
*/
public class GenericUDFWhen extends GenericUDF {
private transient ObjectInspector[] argumentOIs;
@@ -39,7 +39,7 @@ public class GenericUDFWhen extends Gene
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException {
argumentOIs = arguments;
- returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver();
+ returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
for (int i = 0; i + 1 < arguments.length; i += 2) {
if (!arguments[i].getTypeName().equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java Tue Nov 26 08:19:25 2013
@@ -267,6 +267,15 @@ public enum JavaDataModel {
public int lengthForBooleanArrayOfSize(int length) {
return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length);
}
+ public int lengthForTimestampArrayOfSize(int length) {
+ return lengthForPrimitiveArrayOfSize(lengthOfTimestamp(), length);
+ }
+ public int lengthForDateArrayOfSize(int length) {
+ return lengthForPrimitiveArrayOfSize(lengthOfDate(), length);
+ }
+ public int lengthForDecimalArrayOfSize(int length) {
+ return lengthForPrimitiveArrayOfSize(lengthOfDecimal(), length);
+ }
public int lengthOfDecimal() {
// object overhead + 8 bytes for intCompact + 4 bytes for precision
Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java Tue Nov 26 08:19:25 2013
@@ -222,7 +222,7 @@ public class TestFunctionRegistry extend
common(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo,
TypeInfoFactory.stringTypeInfo);
common(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.decimalTypeInfo,
- TypeInfoFactory.getDecimalTypeInfo(65, 30));
+ TypeInfoFactory.decimalTypeInfo);
common(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo,
TypeInfoFactory.stringTypeInfo);
@@ -244,7 +244,7 @@ public class TestFunctionRegistry extend
comparison(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo,
TypeInfoFactory.decimalTypeInfo);
comparison(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.decimalTypeInfo,
- TypeInfoFactory.getDecimalTypeInfo(65, 30));
+ TypeInfoFactory.decimalTypeInfo);
comparison(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo,
TypeInfoFactory.doubleTypeInfo);
@@ -319,7 +319,7 @@ public class TestFunctionRegistry extend
unionAll(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo,
TypeInfoFactory.decimalTypeInfo);
unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.decimalTypeInfo,
- TypeInfoFactory.getDecimalTypeInfo(65, 30));
+ TypeInfoFactory.decimalTypeInfo);
unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo,
TypeInfoFactory.stringTypeInfo);
Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java Tue Nov 26 08:19:25 2013
@@ -116,7 +116,7 @@ public class TestOperators extends TestC
new ObjectInspector[] {r[0].oi});
for (InspectableObject oner : r) {
- op.process(oner.o, 0);
+ op.processOp(oner.o, 0);
}
Map<Enum<?>, Long> results = op.getStats();
@@ -277,7 +277,7 @@ public class TestOperators extends TestC
// evaluate on row
for (int i = 0; i < 5; i++) {
- op.process(r[i].o, 0);
+ op.processOp(r[i].o, 0);
}
op.close(false);
Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisherEnhanced.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisherEnhanced.java?rev=1545564&r1=1545563&r2=1545564&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisherEnhanced.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/exec/TestStatsPublisherEnhanced.java Tue Nov 26 08:19:25 2013
@@ -41,12 +41,14 @@ public class TestStatsPublisherEnhanced
protected String statsImplementationClass;
protected Map<String, String> stats;
+ protected StatsFactory factory;
+
public TestStatsPublisherEnhanced(String name) {
super(name);
conf = new JobConf(TestStatsPublisherEnhanced.class);
conf.set("hive.stats.dbclass", "jdbc:derby");
- statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
- StatsFactory.setImplementation(statsImplementationClass, conf);
+ factory = StatsFactory.newFactory(conf);
+ assert factory != null;
}
@Override
@@ -56,9 +58,9 @@ public class TestStatsPublisherEnhanced
@Override
protected void tearDown() {
- StatsAggregator sa = StatsFactory.getStatsAggregator();
+ StatsAggregator sa = factory.getStatsAggregator();
assertNotNull(sa);
- assertTrue(sa.connect(conf));
+ assertTrue(sa.connect(conf, null));
assertTrue(sa.cleanUp("file_0"));
assertTrue(sa.closeConnection());
}
@@ -82,9 +84,9 @@ public class TestStatsPublisherEnhanced
assertTrue(statsPublisher.connect(conf));
// instantiate stats aggregator
- StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
+ StatsAggregator statsAggregator = factory.getStatsAggregator();
assertNotNull(statsAggregator);
- assertTrue(statsAggregator.connect(conf));
+ assertTrue(statsAggregator.connect(conf, null));
// publish stats
fillStatMap("200", "1000");
@@ -130,9 +132,9 @@ public class TestStatsPublisherEnhanced
assertTrue(statsPublisher.connect(conf));
// instantiate stats aggregator
- StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
+ StatsAggregator statsAggregator = factory.getStatsAggregator();
assertNotNull(statsAggregator);
- assertTrue(statsAggregator.connect(conf));
+ assertTrue(statsAggregator.connect(conf, null));
// statsAggregator.cleanUp("file_0000");
// assertTrue(statsAggregator.connect(conf));
@@ -191,9 +193,9 @@ public class TestStatsPublisherEnhanced
assertTrue(statsPublisher.connect(conf));
// instantiate stats aggregator
- StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
+ StatsAggregator statsAggregator = factory.getStatsAggregator();
assertNotNull(statsAggregator);
- assertTrue(statsAggregator.connect(conf));
+ assertTrue(statsAggregator.connect(conf, null));
// publish stats
fillStatMap("200", "1000");
@@ -256,9 +258,9 @@ public class TestStatsPublisherEnhanced
assertTrue(statsPublisher.connect(conf));
// instantiate stats aggregator
- StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
+ StatsAggregator statsAggregator = factory.getStatsAggregator();
assertNotNull(statsAggregator);
- assertTrue(statsAggregator.connect(conf));
+ assertTrue(statsAggregator.connect(conf, null));
// publish stats
fillStatMap("200", "");
@@ -327,9 +329,9 @@ public class TestStatsPublisherEnhanced
assertTrue(statsPublisher.connect(conf));
// instantiate stats aggregator
- StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
+ StatsAggregator statsAggregator = factory.getStatsAggregator();
assertNotNull(statsAggregator);
- assertTrue(statsAggregator.connect(conf));
+ assertTrue(statsAggregator.connect(conf, null));
// publish stats
fillStatMap("200", "1000");