You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/09/10 22:57:57 UTC
svn commit: r1624127 - in
/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql:
optimizer/optiq/translator/ parse/
Author: hashutosh
Date: Wed Sep 10 20:57:57 2014
New Revision: 1624127
URL: http://svn.apache.org/r1624127
Log:
HIVE-8037 : CBO: Refactor Join condn gen code, loosen restrictions on Join Conditions (John Pullokkaran via Ashutosh Chauhan)
Added:
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinTypeCheckCtx.java
Removed:
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCondnTypeCheckProcFactory.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinTypeCheckCtx.java
Modified:
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
Added: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java?rev=1624127&view=auto
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java (added)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java Wed Sep 10 20:57:57 2014
@@ -0,0 +1,316 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.optiq.translator;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.TypeCheckCtx;
+import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+
+/**
+ * JoinCondTypeCheckProcFactory is used by Optiq planner(CBO) to generate Join Conditions from Join Condition AST.
+ * Reasons for sub class:
+ * 1. Additional restrictions on what is supported in Join Conditions
+ * 2. Column handling is different
+ * 3. Join Condn expr has two input RR as opposed to one.
+ */
+
+/**
+ * TODO:<br>
+ * 1. Could we use combined RR instead of list of RR ?<br>
+ * 2. Use Column Processing from TypeCheckProcFactory<br>
+ * 3. Why not use GB expr ?
+ */
+public class JoinCondTypeCheckProcFactory extends TypeCheckProcFactory {
+
+ public static Map<ASTNode, ExprNodeDesc> genExprNode(ASTNode expr, TypeCheckCtx tcCtx)
+ throws SemanticException {
+ return TypeCheckProcFactory.genExprNode(expr, tcCtx, new JoinCondTypeCheckProcFactory());
+ }
+
+ /**
+ * Processor for table columns.
+ */
+ public static class JoinCondColumnExprProcessor extends ColumnExprProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx;
+ if (ctx.getError() != null) {
+ return null;
+ }
+
+ ASTNode expr = (ASTNode) nd;
+ ASTNode parent = stack.size() > 1 ? (ASTNode) stack.get(stack.size() - 2) : null;
+
+ if (expr.getType() != HiveParser.TOK_TABLE_OR_COL) {
+ ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr), expr);
+ return null;
+ }
+
+ assert (expr.getChildCount() == 1);
+ String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText());
+
+ boolean qualifiedAccess = (parent != null && parent.getType() == HiveParser.DOT);
+
+ ColumnInfo colInfo = null;
+ if (!qualifiedAccess) {
+ colInfo = getColInfo(ctx, null, tableOrCol, expr);
+ // It's a column.
+ return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(),
+ colInfo.getTabAlias(), colInfo.getIsVirtualCol());
+ } else if (hasTableAlias(ctx, tableOrCol, expr)) {
+ return null;
+ } else {
+ throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(expr));
+ }
+ }
+
+ private static boolean hasTableAlias(JoinTypeCheckCtx ctx, String tabName, ASTNode expr)
+ throws SemanticException {
+ int tblAliasCnt = 0;
+ for (RowResolver rr : ctx.getInputRRList()) {
+ if (rr.hasTableAlias(tabName))
+ tblAliasCnt++;
+ }
+
+ if (tblAliasCnt > 1) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr));
+ }
+
+ return (tblAliasCnt == 1) ? true : false;
+ }
+
+ private static ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, String colAlias,
+ ASTNode expr) throws SemanticException {
+ ColumnInfo tmp;
+ ColumnInfo cInfoToRet = null;
+
+ for (RowResolver rr : ctx.getInputRRList()) {
+ tmp = rr.get(tabName, colAlias);
+ if (tmp != null) {
+ if (cInfoToRet != null) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr));
+ }
+ cInfoToRet = tmp;
+ }
+ }
+
+ return cInfoToRet;
+ }
+ }
+
+ /**
+ * Factory method to get ColumnExprProcessor.
+ *
+ * @return ColumnExprProcessor.
+ */
+ @Override
+ public ColumnExprProcessor getColumnExprProcessor() {
+ return new JoinCondColumnExprProcessor();
+ }
+
+ /**
+ * The default processor for typechecking.
+ */
+ public static class JoinCondDefaultExprProcessor extends DefaultExprProcessor {
+ @Override
+ protected List<String> getReferenceableColumnAliases(TypeCheckCtx ctx) {
+ JoinTypeCheckCtx jCtx = (JoinTypeCheckCtx) ctx;
+ List<String> possibleColumnNames = new ArrayList<String>();
+ for (RowResolver rr : jCtx.getInputRRList()) {
+ possibleColumnNames.addAll(rr.getReferenceableColumnAliases(null, -1));
+ }
+
+ return possibleColumnNames;
+ }
+
+ @Override
+ protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr,
+ Object... nodeOutputs) throws SemanticException {
+ String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0)
+ .getText());
+ // NOTE: tableAlias must be a valid non-ambiguous table alias,
+ // because we've checked that in TOK_TABLE_OR_COL's process method.
+ ColumnInfo colInfo = getColInfo((JoinTypeCheckCtx) ctx, tableAlias,
+ ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(), expr);
+
+ if (colInfo == null) {
+ ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr);
+ return null;
+ }
+ return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), tableAlias,
+ colInfo.getIsVirtualCol());
+ }
+
+ private static ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, String colAlias,
+ ASTNode expr) throws SemanticException {
+ ColumnInfo tmp;
+ ColumnInfo cInfoToRet = null;
+
+ for (RowResolver rr : ctx.getInputRRList()) {
+ tmp = rr.get(tabName, colAlias);
+ if (tmp != null) {
+ if (cInfoToRet != null) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr));
+ }
+ cInfoToRet = tmp;
+ }
+ }
+
+ return cInfoToRet;
+ }
+
+ @Override
+ protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi,
+ List<ExprNodeDesc> children, GenericUDF genericUDF) throws SemanticException {
+ super.validateUDF(expr, isFunction, ctx, fi, children, genericUDF);
+
+ JoinTypeCheckCtx jCtx = (JoinTypeCheckCtx) ctx;
+
+ // Join Condition can not contain disjunctions
+ if (genericUDF instanceof GenericUDFOPOr) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3.getMsg(expr));
+ }
+
+ // Non Conjunctive elements have further limitations in Join conditions
+ if (!(genericUDF instanceof GenericUDFOPAnd)) {
+ // Non Comparison UDF other than 'and' can not use inputs from both side
+ if (!(genericUDF instanceof GenericUDFBaseCompare)) {
+ if (genericUDFargsRefersToBothInput(genericUDF, children, jCtx.getInputRRList())) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr));
+ }
+ } else if (genericUDF instanceof GenericUDFBaseCompare) {
+ // Comparisons of non literals LHS/RHS can not refer to inputs from
+ // both sides
+ if (children.size() == 2 && !(children.get(0) instanceof ExprNodeConstantDesc)
+ && !(children.get(1) instanceof ExprNodeConstantDesc)) {
+ if (comparisonUDFargsRefersToBothInput((GenericUDFBaseCompare) genericUDF, children,
+ jCtx.getInputRRList())) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr));
+ }
+ }
+ }
+ }
+ }
+
+ private static boolean genericUDFargsRefersToBothInput(GenericUDF udf,
+ List<ExprNodeDesc> children, List<RowResolver> inputRRList) {
+ boolean argsRefersToBothInput = false;
+
+ Map<Integer, ExprNodeDesc> hasCodeToColDescMap = new HashMap<Integer, ExprNodeDesc>();
+ for (ExprNodeDesc child : children) {
+ ExprNodeDescUtils.getExprNodeColumnDesc(child, hasCodeToColDescMap);
+ }
+ Set<Integer> inputRef = getInputRef(hasCodeToColDescMap.values(), inputRRList);
+
+ if (inputRef.size() > 1)
+ argsRefersToBothInput = true;
+
+ return argsRefersToBothInput;
+ }
+
+ private static boolean comparisonUDFargsRefersToBothInput(GenericUDFBaseCompare comparisonUDF,
+ List<ExprNodeDesc> children, List<RowResolver> inputRRList) {
+ boolean argsRefersToBothInput = false;
+
+ Map<Integer, ExprNodeDesc> lhsHashCodeToColDescMap = new HashMap<Integer, ExprNodeDesc>();
+ Map<Integer, ExprNodeDesc> rhsHashCodeToColDescMap = new HashMap<Integer, ExprNodeDesc>();
+ ExprNodeDescUtils.getExprNodeColumnDesc(children.get(0), lhsHashCodeToColDescMap);
+ ExprNodeDescUtils.getExprNodeColumnDesc(children.get(1), rhsHashCodeToColDescMap);
+ Set<Integer> lhsInputRef = getInputRef(lhsHashCodeToColDescMap.values(), inputRRList);
+ Set<Integer> rhsInputRef = getInputRef(rhsHashCodeToColDescMap.values(), inputRRList);
+
+ if (lhsInputRef.size() > 1 || rhsInputRef.size() > 1)
+ argsRefersToBothInput = true;
+
+ return argsRefersToBothInput;
+ }
+
+ private static Set<Integer> getInputRef(Collection<ExprNodeDesc> colDescSet,
+ List<RowResolver> inputRRList) {
+ String tableAlias;
+ RowResolver inputRR;
+ Set<Integer> inputLineage = new HashSet<Integer>();
+
+ for (ExprNodeDesc col : colDescSet) {
+ ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) col;
+ tableAlias = colDesc.getTabAlias();
+
+ for (int i = 0; i < inputRRList.size(); i++) {
+ inputRR = inputRRList.get(i);
+
+ // If table Alias is present check if InputRR has that table and then
+ // check for internal name
+ // else if table alias is null then check with internal name in all
+ // inputRR.
+ if (tableAlias != null) {
+ if (inputRR.hasTableAlias(tableAlias)) {
+ if (inputRR.getInvRslvMap().containsKey(colDesc.getColumn())) {
+ inputLineage.add(i);
+ }
+ }
+ } else {
+ if (inputRR.getInvRslvMap().containsKey(colDesc.getColumn())) {
+ inputLineage.add(i);
+ }
+ }
+ }
+ }
+
+ return inputLineage;
+ }
+ }
+
+ /**
+ * Factory method to get DefaultExprProcessor.
+ *
+ * @return DefaultExprProcessor.
+ */
+ @Override
+ public DefaultExprProcessor getDefaultExprProcessor() {
+ return new JoinCondDefaultExprProcessor();
+ }
+}
Added: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinTypeCheckCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinTypeCheckCtx.java?rev=1624127&view=auto
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinTypeCheckCtx.java (added)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinTypeCheckCtx.java Wed Sep 10 20:57:57 2014
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.optiq.translator;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.parse.JoinType;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.TypeCheckCtx;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * JoinTypeCheckCtx is used by Optiq planner(CBO) to generate Join Conditions from Join Condition AST.
+ * Reasons for sub class:
+ * 1. Join Conditions can not handle:
+ * a. Stateful Functions
+ * b. Distinct
+ * c. '*' expr
+ * d. '.*' expr
+ * e. Windowing expr
+ * f. Complex type member access
+ * g. Array Index Access
+ * h. Sub query
+ * i. GB expr elimination
+ * 2. Join Condn expr has two input RR as opposed to one.
+ */
+
+/**
+ * TODO:<br>
+ * 1. Could we use combined RR instead of list of RR ?<br>
+ * 2. Why not use GB expr ?
+ */
+public class JoinTypeCheckCtx extends TypeCheckCtx {
+ private final ImmutableList<RowResolver> inputRRLst;
+ private final boolean outerJoin;
+
+ public JoinTypeCheckCtx(RowResolver leftRR, RowResolver rightRR, JoinType hiveJoinType)
+ throws SemanticException {
+ super(RowResolver.getCombinedRR(leftRR, rightRR), false, false, false, false, false, false,
+ false, false, false);
+ this.inputRRLst = ImmutableList.of(leftRR, rightRR);
+ this.outerJoin = (hiveJoinType == JoinType.LEFTOUTER) || (hiveJoinType == JoinType.RIGHTOUTER)
+ || (hiveJoinType == JoinType.FULLOUTER);
+ }
+
+ /**
+ * @return the inputRR List
+ */
+ public List<RowResolver> getInputRRList() {
+ return inputRRLst;
+ }
+
+ public boolean isOuterJoin() {
+ return outerJoin;
+ }
+}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1624127&r1=1624126&r2=1624127&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed Sep 10 20:57:57 2014
@@ -51,7 +51,6 @@ import org.apache.hadoop.fs.FSDataOutput
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
@@ -127,6 +126,8 @@ import org.apache.hadoop.hive.ql.optimiz
import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePartitionPrunerRule;
import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushFilterPastJoinRule;
import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ASTConverter;
+import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinCondTypeCheckProcFactory;
+import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinTypeCheckCtx;
import org.apache.hadoop.hive.ql.optimizer.optiq.translator.RexNodeConverter;
import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter;
import org.apache.hadoop.hive.ql.optimizer.optiq.translator.TypeConverter;
@@ -12166,8 +12167,12 @@ public class SemanticAnalyzer extends Ba
// 2. Construct ExpressionNodeDesc representing Join Condition
RexNode optiqJoinCond = null;
if (joinCond != null) {
- Map<ASTNode, ExprNodeDesc> exprNodes = JoinCondnTypeCheckProcFactory.genExprNode(joinCond,
- new JoinTypeCheckCtx(leftRR, rightRR));
+ JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType);
+ Map<ASTNode, ExprNodeDesc> exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond,
+ jCtx);
+ if (jCtx.getError() != null)
+ throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(),
+ jCtx.getError()));
ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond);
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java?rev=1624127&r1=1624126&r2=1624127&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java Wed Sep 10 20:57:57 2014
@@ -54,6 +54,22 @@ public class TypeCheckCtx implements Nod
private boolean allowDistinctFunctions;
+ private final boolean allowGBExprElimination;
+
+ private final boolean allowAllColRef;
+
+ private final boolean allowFunctionStar;
+
+ private final boolean allowWindowing;
+
+ // "." : FIELD Expression
+ private final boolean allowFieldExpr;
+
+ // "[]" : LSQUARE/INDEX Expression
+ private final boolean allowIndexExpr;
+
+ private final boolean allowSubQueryExpr;
+
/**
* Constructor.
*
@@ -61,10 +77,24 @@ public class TypeCheckCtx implements Nod
* The input row resolver of the previous operator.
*/
public TypeCheckCtx(RowResolver inputRR) {
+ this(inputRR, false, true, true, true, true, true, true, true, true);
+ }
+
+ public TypeCheckCtx(RowResolver inputRR, boolean allowStatefulFunctions,
+ boolean allowDistinctFunctions, boolean allowGBExprElimination, boolean allowAllColRef,
+ boolean allowFunctionStar, boolean allowWindowing, boolean allowFieldExpr,
+ boolean allowIndexExpr, boolean allowSubQueryExpr) {
setInputRR(inputRR);
error = null;
- allowStatefulFunctions = false;
- allowDistinctFunctions = true;
+ this.allowStatefulFunctions = allowStatefulFunctions;
+ this.allowDistinctFunctions = allowDistinctFunctions;
+ this.allowGBExprElimination = allowGBExprElimination;
+ this.allowAllColRef = allowAllColRef;
+ this.allowFunctionStar = allowFunctionStar;
+ this.allowWindowing = allowWindowing;
+ this.allowFieldExpr = allowFieldExpr;
+ this.allowIndexExpr = allowIndexExpr;
+ this.allowSubQueryExpr = allowSubQueryExpr;
}
/**
@@ -98,7 +128,8 @@ public class TypeCheckCtx implements Nod
}
/**
- * @param allowStatefulFunctions whether to allow stateful UDF invocations
+ * @param allowStatefulFunctions
+ * whether to allow stateful UDF invocations
*/
public void setAllowStatefulFunctions(boolean allowStatefulFunctions) {
this.allowStatefulFunctions = allowStatefulFunctions;
@@ -114,7 +145,7 @@ public class TypeCheckCtx implements Nod
/**
* @param error
* the error to set
- *
+ *
*/
public void setError(String error, ASTNode errorSrcNode) {
this.error = error;
@@ -136,7 +167,35 @@ public class TypeCheckCtx implements Nod
this.allowDistinctFunctions = allowDistinctFunctions;
}
- public boolean isAllowDistinctFunctions() {
+ public boolean getAllowDistinctFunctions() {
return allowDistinctFunctions;
}
+
+ public boolean getAllowGBExprElimination() {
+ return allowGBExprElimination;
+ }
+
+ public boolean getallowAllColRef() {
+ return allowAllColRef;
+ }
+
+ public boolean getallowFunctionStar() {
+ return allowFunctionStar;
+ }
+
+ public boolean getallowWindowing() {
+ return allowWindowing;
+ }
+
+ public boolean getallowFieldExpr() {
+ return allowFieldExpr;
+ }
+
+ public boolean getallowIndexExpr() {
+ return allowIndexExpr;
+ }
+
+ public boolean getallowSubQueryExpr() {
+ return allowSubQueryExpr;
+ }
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1624127&r1=1624126&r2=1624127&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Wed Sep 10 20:57:57 2014
@@ -80,12 +80,12 @@ import org.apache.hadoop.hive.serde2.typ
* expression Node Descriptor trees. They also introduce the correct conversion
* functions to do proper implicit conversion.
*/
-public final class TypeCheckProcFactory {
+public class TypeCheckProcFactory {
protected static final Log LOG = LogFactory.getLog(TypeCheckProcFactory.class
.getName());
- private TypeCheckProcFactory() {
+ protected TypeCheckProcFactory() {
// prevent instantiation
}
@@ -118,7 +118,7 @@ public final class TypeCheckProcFactory
RowResolver input = ctx.getInputRR();
ExprNodeDesc desc = null;
- if ((ctx == null) || (input == null)) {
+ if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
return null;
}
@@ -137,8 +137,13 @@ public final class TypeCheckProcFactory
return desc;
}
- public static Map<ASTNode, ExprNodeDesc> genExprNode(ASTNode expr,
- TypeCheckCtx tcCtx) throws SemanticException {
+ public static Map<ASTNode, ExprNodeDesc> genExprNode(ASTNode expr, TypeCheckCtx tcCtx)
+ throws SemanticException {
+ return genExprNode(expr, tcCtx, new TypeCheckProcFactory());
+ }
+
+ protected static Map<ASTNode, ExprNodeDesc> genExprNode(ASTNode expr,
+ TypeCheckCtx tcCtx, TypeCheckProcFactory tf) throws SemanticException {
// Create the walker, the rules dispatcher and the context.
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher
@@ -146,13 +151,13 @@ public final class TypeCheckProcFactory
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"),
- getNullExprProcessor());
+ tf.getNullExprProcessor());
opRules.put(new RuleRegExp("R2", HiveParser.Number + "%|" +
HiveParser.TinyintLiteral + "%|" +
HiveParser.SmallintLiteral + "%|" +
HiveParser.BigintLiteral + "%|" +
HiveParser.DecimalLiteral + "%"),
- getNumExprProcessor());
+ tf.getNumExprProcessor());
opRules
.put(new RuleRegExp("R3", HiveParser.Identifier + "%|"
+ HiveParser.StringLiteral + "%|" + HiveParser.TOK_CHARSETLITERAL + "%|"
@@ -162,18 +167,18 @@ public final class TypeCheckProcFactory
+ HiveParser.KW_ARRAY + "%|" + HiveParser.KW_MAP + "%|"
+ HiveParser.KW_STRUCT + "%|" + HiveParser.KW_EXISTS + "%|"
+ HiveParser.TOK_SUBQUERY_OP_NOTIN + "%"),
- getStrExprProcessor());
+ tf.getStrExprProcessor());
opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|"
- + HiveParser.KW_FALSE + "%"), getBoolExprProcessor());
- opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%"), getDateExprProcessor());
+ + HiveParser.KW_FALSE + "%"), tf.getBoolExprProcessor());
+ opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%"), tf.getDateExprProcessor());
opRules.put(new RuleRegExp("R6", HiveParser.TOK_TABLE_OR_COL + "%"),
- getColumnExprProcessor());
+ tf.getColumnExprProcessor());
opRules.put(new RuleRegExp("R7", HiveParser.TOK_SUBQUERY_OP + "%"),
- getSubQueryExprProcessor());
+ tf.getSubQueryExprProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
- Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
+ Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(),
opRules, tcCtx);
GraphWalker ogw = new DefaultGraphWalker(disp);
@@ -229,7 +234,7 @@ public final class TypeCheckProcFactory
*
* @return NullExprProcessor.
*/
- public static NullExprProcessor getNullExprProcessor() {
+ public NullExprProcessor getNullExprProcessor() {
return new NullExprProcessor();
}
@@ -304,7 +309,7 @@ public final class TypeCheckProcFactory
*
* @return NumExprProcessor.
*/
- public static NumExprProcessor getNumExprProcessor() {
+ public NumExprProcessor getNumExprProcessor() {
return new NumExprProcessor();
}
@@ -362,7 +367,7 @@ public final class TypeCheckProcFactory
*
* @return StrExprProcessor.
*/
- public static StrExprProcessor getStrExprProcessor() {
+ public StrExprProcessor getStrExprProcessor() {
return new StrExprProcessor();
}
@@ -408,7 +413,7 @@ public final class TypeCheckProcFactory
*
* @return BoolExprProcessor.
*/
- public static BoolExprProcessor getBoolExprProcessor() {
+ public BoolExprProcessor getBoolExprProcessor() {
return new BoolExprProcessor();
}
@@ -449,7 +454,7 @@ public final class TypeCheckProcFactory
*
* @return DateExprProcessor.
*/
- public static DateExprProcessor getDateExprProcessor() {
+ public DateExprProcessor getDateExprProcessor() {
return new DateExprProcessor();
}
@@ -546,7 +551,7 @@ public final class TypeCheckProcFactory
*
* @return ColumnExprProcessor.
*/
- public static ColumnExprProcessor getColumnExprProcessor() {
+ public ColumnExprProcessor getColumnExprProcessor() {
return new ColumnExprProcessor();
}
@@ -613,7 +618,7 @@ public final class TypeCheckProcFactory
windowingTokens.add(HiveParser.TOK_TABSORTCOLNAMEDESC);
}
- private static boolean isRedundantConversionFunction(ASTNode expr,
+ protected static boolean isRedundantConversionFunction(ASTNode expr,
boolean isFunction, ArrayList<ExprNodeDesc> children) {
if (!isFunction) {
return false;
@@ -700,7 +705,30 @@ public final class TypeCheckProcFactory
return getFuncExprNodeDescWithUdfData(udfName, null, children);
}
- static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr,
+ protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi,
+ List<ExprNodeDesc> children, GenericUDF genericUDF) throws SemanticException {
+ // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't
+ // supported
+ if (fi.getGenericUDTF() != null) {
+ throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg());
+ }
+ // UDAF in filter condition, group-by caluse, param of funtion, etc.
+ if (fi.getGenericUDAFResolver() != null) {
+ if (isFunction) {
+ throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg((ASTNode) expr
+ .getChild(0)));
+ } else {
+ throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr));
+ }
+ }
+ if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) {
+ if (FunctionRegistry.isStateful(genericUDF)) {
+ throw new SemanticException(ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg());
+ }
+ }
+ }
+
+ protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr,
boolean isFunction, ArrayList<ExprNodeDesc> children, TypeCheckCtx ctx)
throws SemanticException, UDFArgumentException {
// return the child directly if the conversion is redundant.
@@ -713,6 +741,9 @@ public final class TypeCheckProcFactory
ExprNodeDesc desc;
if (funcText.equals(".")) {
// "." : FIELD Expression
+ if (!ctx.getallowFieldExpr())
+ throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr));
+
assert (children.size() == 2);
// Only allow constant field name for now
assert (children.get(1) instanceof ExprNodeConstantDesc);
@@ -727,23 +758,22 @@ public final class TypeCheckProcFactory
// Allow accessing a field of list element structs directly from a list
boolean isList = (object.getTypeInfo().getCategory() == ObjectInspector.Category.LIST);
if (isList) {
- objectTypeInfo = ((ListTypeInfo) objectTypeInfo)
- .getListElementTypeInfo();
+ objectTypeInfo = ((ListTypeInfo) objectTypeInfo).getListElementTypeInfo();
}
if (objectTypeInfo.getCategory() != Category.STRUCT) {
throw new SemanticException(ErrorMsg.INVALID_DOT.getMsg(expr));
}
- TypeInfo t = ((StructTypeInfo) objectTypeInfo)
- .getStructFieldTypeInfo(fieldNameString);
+ TypeInfo t = ((StructTypeInfo) objectTypeInfo).getStructFieldTypeInfo(fieldNameString);
if (isList) {
t = TypeInfoFactory.getListTypeInfo(t);
}
- desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString,
- isList);
-
+ desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString, isList);
} else if (funcText.equals("[")) {
// "[]" : LSQUARE/INDEX Expression
+ if (!ctx.getallowIndexExpr())
+ throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr));
+
assert (children.size() == 2);
// Check whether this is a list or a map
@@ -753,35 +783,29 @@ public final class TypeCheckProcFactory
// Only allow integer index for now
if (!(children.get(1) instanceof ExprNodeConstantDesc)
|| !(((ExprNodeConstantDesc) children.get(1)).getTypeInfo()
- .equals(TypeInfoFactory.intTypeInfo))) {
- throw new SemanticException(SemanticAnalyzer.generateErrorMessage(
- expr,
- ErrorMsg.INVALID_ARRAYINDEX_CONSTANT.getMsg()));
+ .equals(TypeInfoFactory.intTypeInfo))) {
+ throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
+ ErrorMsg.INVALID_ARRAYINDEX_CONSTANT.getMsg()));
}
// Calculate TypeInfo
TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo();
- desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry
- .getGenericUDFForIndex(), children);
+ desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children);
} else if (myt.getCategory() == Category.MAP) {
// Only allow constant map key for now
if (!(children.get(1) instanceof ExprNodeConstantDesc)) {
- throw new SemanticException(SemanticAnalyzer.generateErrorMessage(
- expr,
- ErrorMsg.INVALID_MAPINDEX_CONSTANT.getMsg()));
+ throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
+ ErrorMsg.INVALID_MAPINDEX_CONSTANT.getMsg()));
}
- if (!(((ExprNodeConstantDesc) children.get(1)).getTypeInfo()
- .equals(((MapTypeInfo) myt).getMapKeyTypeInfo()))) {
- throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_TYPE
- .getMsg(expr));
+ if (!(((ExprNodeConstantDesc) children.get(1)).getTypeInfo().equals(((MapTypeInfo) myt)
+ .getMapKeyTypeInfo()))) {
+ throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_TYPE.getMsg(expr));
}
// Calculate TypeInfo
TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo();
- desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry
- .getGenericUDFForIndex(), children);
+ desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children);
} else {
- throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr,
- myt.getTypeName()));
+ throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr, myt.getTypeName()));
}
} else {
// other operators or functions
@@ -833,26 +857,7 @@ public final class TypeCheckProcFactory
}
}
- // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't
- // supported
- if (fi.getGenericUDTF() != null) {
- throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg());
- }
- // UDAF in filter condition, group-by caluse, param of funtion, etc.
- if (fi.getGenericUDAFResolver() != null) {
- if (isFunction) {
- throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.
- getMsg((ASTNode) expr.getChild(0)));
- } else {
- throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr));
- }
- }
- if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) {
- if (FunctionRegistry.isStateful(genericUDF)) {
- throw new SemanticException(
- ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg());
- }
- }
+ validateUDF(expr, isFunction, ctx, fi, children, genericUDF);
// Try to infer the type of the constant only if there are two
// nodes, one of them is column and the other is numeric const
@@ -963,6 +968,24 @@ public final class TypeCheckProcFactory
return false;
}
+ protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr,
+ Object... nodeOutputs) throws SemanticException {
+ RowResolver input = ctx.getInputRR();
+ String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0)
+ .getText());
+ // NOTE: tableAlias must be a valid non-ambiguous table alias,
+ // because we've checked that in TOK_TABLE_OR_COL's process method.
+ ColumnInfo colInfo = input.get(tableAlias, ((ExprNodeConstantDesc) nodeOutputs[1]).getValue()
+ .toString());
+
+ if (colInfo == null) {
+ ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr);
+ return null;
+ }
+ return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(),
+ colInfo.getTabAlias(), colInfo.getIsVirtualCol());
+ }
+
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
@@ -1012,7 +1035,11 @@ public final class TypeCheckProcFactory
* The difference is that there is translation for Window related tokens, so we just
* return null;
*/
- if ( windowingTokens.contains(expr.getType())) {
+ if (windowingTokens.contains(expr.getType())) {
+ if (!ctx.getallowWindowing())
+ throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
+ ErrorMsg.INVALID_FUNCTION.getMsg("Windowing is not supported in the context")));
+
return null;
}
@@ -1021,6 +1048,11 @@ public final class TypeCheckProcFactory
}
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
+ if (!ctx.getallowAllColRef())
+ throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
+ ErrorMsg.INVALID_COLUMN
+ .getMsg("All column reference is not supported in the context")));
+
RowResolver input = ctx.getInputRR();
ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc();
assert expr.getChildCount() <= 1;
@@ -1058,22 +1090,7 @@ public final class TypeCheckProcFactory
if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
&& nodeOutputs[0] == null) {
-
- RowResolver input = ctx.getInputRR();
- String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr
- .getChild(0).getChild(0).getText());
- // NOTE: tableAlias must be a valid non-ambiguous table alias,
- // because we've checked that in TOK_TABLE_OR_COL's process method.
- ColumnInfo colInfo = input.get(tableAlias,
- ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString());
-
- if (colInfo == null) {
- ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr);
- return null;
- }
- return new ExprNodeColumnDesc(colInfo.getType(), colInfo
- .getInternalName(), colInfo.getTabAlias(), colInfo
- .getIsVirtualCol());
+ return processQualifiedColRef(ctx, expr, nodeOutputs);
}
// Return nulls for conversion operators
@@ -1088,7 +1105,7 @@ public final class TypeCheckProcFactory
expr.getType() == HiveParser.TOK_FUNCTIONSTAR ||
expr.getType() == HiveParser.TOK_FUNCTIONDI);
- if (!ctx.isAllowDistinctFunctions() && expr.getType() == HiveParser.TOK_FUNCTIONDI) {
+ if (!ctx.getAllowDistinctFunctions() && expr.getType() == HiveParser.TOK_FUNCTIONDI) {
throw new SemanticException(
SemanticAnalyzer.generateErrorMessage(expr, ErrorMsg.DISTINCT_NOT_SUPPORTED.getMsg()));
}
@@ -1107,6 +1124,11 @@ public final class TypeCheckProcFactory
}
if (expr.getType() == HiveParser.TOK_FUNCTIONSTAR) {
+ if (!ctx.getallowFunctionStar())
+ throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr,
+ ErrorMsg.INVALID_COLUMN
+ .getMsg(".* reference is not supported in the context")));
+
RowResolver input = ctx.getInputRR();
for (ColumnInfo colInfo : input.getColumnInfos()) {
if (!colInfo.getIsVirtualCol()) {
@@ -1119,8 +1141,7 @@ public final class TypeCheckProcFactory
// If any of the children contains null, then return a null
// this is a hack for now to handle the group by case
if (children.contains(null)) {
- RowResolver input = ctx.getInputRR();
- List<String> possibleColumnNames = input.getReferenceableColumnAliases(null, -1);
+ List<String> possibleColumnNames = getReferenceableColumnAliases(ctx);
String reason = String.format("(possible column names are: %s)",
StringUtils.join(possibleColumnNames, ", "));
ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(0), reason),
@@ -1143,6 +1164,9 @@ public final class TypeCheckProcFactory
}
}
+ protected List<String> getReferenceableColumnAliases(TypeCheckCtx ctx) {
+ return ctx.getInputRR().getReferenceableColumnAliases(null, -1);
+ }
}
/**
@@ -1150,7 +1174,7 @@ public final class TypeCheckProcFactory
*
* @return DefaultExprProcessor.
*/
- public static DefaultExprProcessor getDefaultExprProcessor() {
+ public DefaultExprProcessor getDefaultExprProcessor() {
return new DefaultExprProcessor();
}
@@ -1168,13 +1192,18 @@ public final class TypeCheckProcFactory
return null;
}
+ ASTNode expr = (ASTNode) nd;
+ ASTNode sqNode = (ASTNode) expr.getParent().getChild(1);
+
+ if (!ctx.getallowSubQueryExpr())
+ throw new SemanticException(SemanticAnalyzer.generateErrorMessage(sqNode,
+ ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg()));
+
ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx);
if (desc != null) {
return desc;
}
- ASTNode expr = (ASTNode) nd;
- ASTNode sqNode = (ASTNode) expr.getParent().getChild(1);
/*
* Restriction.1.h :: SubQueries only supported in the SQL Where Clause.
*/
@@ -1190,7 +1219,7 @@ public final class TypeCheckProcFactory
*
* @return DateExprProcessor.
*/
- public static SubQueryExprProcessor getSubQueryExprProcessor() {
+ public SubQueryExprProcessor getSubQueryExprProcessor() {
return new SubQueryExprProcessor();
}
}