You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2010/01/21 11:38:15 UTC
svn commit: r901644 [20/37] - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/
ql/src/java/org/apache/hadoop/hive/ql/history/ ql/src/jav...
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=901644&r1=901643&r2=901644&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Jan 21 10:37:58 2010
@@ -79,7 +79,6 @@
import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory;
import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1;
import org.apache.hadoop.hive.ql.optimizer.GenMROperator;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext;
@@ -93,7 +92,6 @@
import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
-import org.apache.hadoop.hive.ql.optimizer.physical.GenMRSkewJoinProcessor;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
@@ -113,7 +111,6 @@
import org.apache.hadoop.hive.ql.plan.fetchWork;
import org.apache.hadoop.hive.ql.plan.fileSinkDesc;
import org.apache.hadoop.hive.ql.plan.filterDesc;
-import org.apache.hadoop.hive.ql.plan.filterDesc.sampleDesc;
import org.apache.hadoop.hive.ql.plan.forwardDesc;
import org.apache.hadoop.hive.ql.plan.groupByDesc;
import org.apache.hadoop.hive.ql.plan.joinDesc;
@@ -131,6 +128,7 @@
import org.apache.hadoop.hive.ql.plan.tableScanDesc;
import org.apache.hadoop.hive.ql.plan.udtfDesc;
import org.apache.hadoop.hive.ql.plan.unionDesc;
+import org.apache.hadoop.hive.ql.plan.filterDesc.sampleDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
@@ -164,7 +162,7 @@
private List<loadTableDesc> loadTableWork;
private List<loadFileDesc> loadFileWork;
private Map<JoinOperator, QBJoinTree> joinContext;
- private HashMap<TableScanOperator, Table> topToTable;
+ private final HashMap<TableScanOperator, Table> topToTable;
private QB qb;
private ASTNode ast;
private int destTableId;
@@ -175,7 +173,7 @@
Map<String, PrunedPartitionList> prunedPartitions;
private createViewDesc createVwDesc;
private ASTNode viewSelect;
- private UnparseTranslator unparseTranslator;
+ private final UnparseTranslator unparseTranslator;
private static class Phase1Ctx {
String dest;
@@ -186,39 +184,39 @@
super(conf);
- this.opToPartPruner = new HashMap<TableScanOperator, exprNodeDesc>();
- this.opToSamplePruner = new HashMap<TableScanOperator, sampleDesc>();
- this.topOps = new HashMap<String, Operator<? extends Serializable>>();
- this.topSelOps = new HashMap<String, Operator<? extends Serializable>>();
- this.loadTableWork = new ArrayList<loadTableDesc>();
- this.loadFileWork = new ArrayList<loadFileDesc>();
+ opToPartPruner = new HashMap<TableScanOperator, exprNodeDesc>();
+ opToSamplePruner = new HashMap<TableScanOperator, sampleDesc>();
+ topOps = new HashMap<String, Operator<? extends Serializable>>();
+ topSelOps = new HashMap<String, Operator<? extends Serializable>>();
+ loadTableWork = new ArrayList<loadTableDesc>();
+ loadFileWork = new ArrayList<loadFileDesc>();
opParseCtx = new LinkedHashMap<Operator<? extends Serializable>, OpParseContext>();
joinContext = new HashMap<JoinOperator, QBJoinTree>();
topToTable = new HashMap<TableScanOperator, Table>();
- this.destTableId = 1;
- this.uCtx = null;
- this.listMapJoinOpsNoReducer = new ArrayList<MapJoinOperator>();
- this.groupOpToInputTables = new HashMap<GroupByOperator, Set<String>>();
- prunedPartitions = new HashMap<String, PrunedPartitionList> ();
+ destTableId = 1;
+ uCtx = null;
+ listMapJoinOpsNoReducer = new ArrayList<MapJoinOperator>();
+ groupOpToInputTables = new HashMap<GroupByOperator, Set<String>>();
+ prunedPartitions = new HashMap<String, PrunedPartitionList>();
unparseTranslator = new UnparseTranslator();
}
@Override
protected void reset() {
super.reset();
- this.loadTableWork.clear();
- this.loadFileWork.clear();
- this.topOps.clear();
- this.topSelOps.clear();
- this.destTableId = 1;
- this.idToTableNameMap.clear();
+ loadTableWork.clear();
+ loadFileWork.clear();
+ topOps.clear();
+ topSelOps.clear();
+ destTableId = 1;
+ idToTableNameMap.clear();
qb = null;
ast = null;
uCtx = null;
- this.joinContext.clear();
- this.opParseCtx.clear();
- this.groupOpToInputTables.clear();
- this.prunedPartitions.clear();
+ joinContext.clear();
+ opParseCtx.clear();
+ groupOpToInputTables.clear();
+ prunedPartitions.clear();
}
public void init(ParseContext pctx) {
@@ -233,24 +231,23 @@
ctx = pctx.getContext();
destTableId = pctx.getDestTableId();
idToTableNameMap = pctx.getIdToTableNameMap();
- this.uCtx = pctx.getUCtx();
- this.listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
+ uCtx = pctx.getUCtx();
+ listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
qb = pctx.getQB();
- this.groupOpToInputTables = pctx.getGroupOpToInputTables();
- this.prunedPartitions = pctx.getPrunedPartitions();
+ groupOpToInputTables = pctx.getGroupOpToInputTables();
+ prunedPartitions = pctx.getPrunedPartitions();
}
public ParseContext getParseContext() {
- return new ParseContext(conf, qb, ast, opToPartPruner, topOps,
- topSelOps, opParseCtx, joinContext, topToTable, loadTableWork,
- loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
- listMapJoinOpsNoReducer,
- groupOpToInputTables, prunedPartitions, opToSamplePruner);
+ return new ParseContext(conf, qb, ast, opToPartPruner, topOps, topSelOps,
+ opParseCtx, joinContext, topToTable, loadTableWork, loadFileWork, ctx,
+ idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer,
+ groupOpToInputTables, prunedPartitions, opToSamplePruner);
}
@SuppressWarnings("nls")
- public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id,
- String alias) throws SemanticException {
+ public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias)
+ throws SemanticException {
assert (ast.getToken() != null);
switch (ast.getToken().getType()) {
@@ -296,7 +293,7 @@
/**
* DFS-scan the expressionTree to find all aggregation subtrees and put them
* in aggregations.
- *
+ *
* @param expressionTree
* @param aggregations
* the key to the HashTable is the toStringTree() representation of
@@ -308,13 +305,14 @@
|| expressionTree.getToken().getType() == HiveParser.TOK_FUNCTIONDI) {
assert (expressionTree.getChildCount() != 0);
if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {
- String functionName = unescapeIdentifier(expressionTree.getChild(0).getText());
+ String functionName = unescapeIdentifier(expressionTree.getChild(0)
+ .getText());
if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
aggregations.put(expressionTree.toStringTree(), expressionTree);
FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName);
if (!fi.isNative()) {
- unparseTranslator.addIdentifierTranslation(
- (ASTNode) expressionTree.getChild(0));
+ unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree
+ .getChild(0));
}
return;
}
@@ -336,7 +334,8 @@
if (expr == null) {
expr = value;
} else {
- throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg());
+ throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS
+ .getMsg());
}
}
}
@@ -347,7 +346,7 @@
* Goes though the tabref tree and finds the alias for the table. Once found,
* it records the table name-> alias association in aliasToTabs. It also makes
* an association from the alias to the table AST in parse info.
- *
+ *
* @return the alias of the table
*/
private String processTable(QB qb, ASTNode tabref) throws SemanticException {
@@ -360,47 +359,48 @@
// tablename tablesample
// OR
// tablename alias
- ASTNode ct = (ASTNode)tabref.getChild(1);
+ ASTNode ct = (ASTNode) tabref.getChild(1);
if (ct.getToken().getType() == HiveParser.TOK_TABLESAMPLE) {
tableSamplePresent = true;
- }
- else {
+ } else {
aliasIndex = 1;
}
- }
- else if (tabref.getChildCount() == 3) {
+ } else if (tabref.getChildCount() == 3) {
// table name table sample alias
aliasIndex = 2;
tableSamplePresent = true;
}
- ASTNode tableTree = (ASTNode)(tabref.getChild(0));
+ ASTNode tableTree = (ASTNode) (tabref.getChild(0));
String alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText());
// If the alias is already there then we have a conflict
if (qb.exists(alias)) {
- throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref.getChild(aliasIndex)));
+ throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref
+ .getChild(aliasIndex)));
}
if (tableSamplePresent) {
- ASTNode sampleClause = (ASTNode)tabref.getChild(1);
+ ASTNode sampleClause = (ASTNode) tabref.getChild(1);
ArrayList<ASTNode> sampleCols = new ArrayList<ASTNode>();
if (sampleClause.getChildCount() > 2) {
for (int i = 2; i < sampleClause.getChildCount(); i++) {
- sampleCols.add((ASTNode)sampleClause.getChild(i));
+ sampleCols.add((ASTNode) sampleClause.getChild(i));
}
}
// TODO: For now only support sampling on up to two columns
// Need to change it to list of columns
if (sampleCols.size() > 2) {
- throw new SemanticException(ErrorMsg.SAMPLE_RESTRICTION.getMsg(tabref.getChild(0)));
+ throw new SemanticException(ErrorMsg.SAMPLE_RESTRICTION.getMsg(tabref
+ .getChild(0)));
}
- qb.getParseInfo().setTabSample(alias, new TableSample(
- unescapeIdentifier(sampleClause.getChild(0).getText()),
- unescapeIdentifier(sampleClause.getChild(1).getText()),
- sampleCols)
- );
+ qb.getParseInfo().setTabSample(
+ alias,
+ new TableSample(
+ unescapeIdentifier(sampleClause.getChild(0).getText()),
+ unescapeIdentifier(sampleClause.getChild(1).getText()),
+ sampleCols));
if (unparseTranslator.isEnabled()) {
for (ASTNode sampleCol : sampleCols) {
- unparseTranslator.addIdentifierTranslation(
- (ASTNode) sampleCol.getChild(0));
+ unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol
+ .getChild(0));
}
}
}
@@ -412,10 +412,10 @@
unparseTranslator.addIdentifierTranslation(tableTree);
if (aliasIndex != 0) {
- unparseTranslator.addIdentifierTranslation(
- (ASTNode) tabref.getChild(aliasIndex));
+ unparseTranslator.addIdentifierTranslation((ASTNode) tabref
+ .getChild(aliasIndex));
}
-
+
return alias;
}
@@ -435,26 +435,26 @@
// If the alias is already there then we have a conflict
if (qb.exists(alias)) {
- throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq.getChild(1)));
+ throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq
+ .getChild(1)));
}
// Insert this map into the stats
qb.setSubqAlias(alias, qbexpr);
- unparseTranslator.addIdentifierTranslation(
- (ASTNode) subq.getChild(1));
+ unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1));
return alias;
}
- private boolean isJoinToken(ASTNode node)
- {
- if ((node.getToken().getType() == HiveParser.TOK_JOIN) ||
- (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) ||
- (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) ||
- (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN) ||
- (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN) ||
- (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN))
+ private boolean isJoinToken(ASTNode node) {
+ if ((node.getToken().getType() == HiveParser.TOK_JOIN)
+ || (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
return true;
+ }
return false;
}
@@ -462,7 +462,7 @@
/**
* Given the AST with TOK_JOIN as the root, get all the aliases for the tables
* or subqueries in the join.
- *
+ *
* @param qb
* @param join
* @throws SemanticException
@@ -471,8 +471,9 @@
private void processJoin(QB qb, ASTNode join) throws SemanticException {
int numChildren = join.getChildCount();
if ((numChildren != 2) && (numChildren != 3)
- && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN)
+ && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) {
throw new SemanticException("Join with multiple children");
+ }
for (int num = 0; num < numChildren; num++) {
ASTNode child = (ASTNode) join.getChild(num);
@@ -485,8 +486,8 @@
// is not supported. Instead, the lateral view must be in a subquery
// SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a
// JOIN src2 ...
- throw new
- SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN.getMsg(join));
+ throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN
+ .getMsg(join));
} else if (isJoinToken(child)) {
processJoin(qb, child);
}
@@ -497,7 +498,7 @@
* Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the
* table or subquery in the lateral view and also make a mapping from the
* alias to all the lateral view AST's
- *
+ *
* @param qb
* @param lateralView
* @return the alias for the table/subquery
@@ -505,15 +506,15 @@
*/
private String processLateralView(QB qb, ASTNode lateralView)
- throws SemanticException {
+ throws SemanticException {
int numChildren = lateralView.getChildCount();
- assert(numChildren == 2);
+ assert (numChildren == 2);
ASTNode next = (ASTNode) lateralView.getChild(1);
String alias = null;
- switch(next.getToken().getType()) {
+ switch (next.getToken().getType()) {
case HiveParser.TOK_TABREF:
alias = processTable(qb, next);
break;
@@ -524,8 +525,8 @@
alias = processLateralView(qb, next);
break;
default:
- throw new SemanticException(
- ErrorMsg.LATERAL_VIEW_INVALID_CHILD.getMsg(lateralView));
+ throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD
+ .getMsg(lateralView));
}
qb.getParseInfo().addLateralViewForAlias(alias, lateralView);
return alias;
@@ -533,23 +534,21 @@
/**
* Phase 1: (including, but not limited to):
- *
+ *
* 1. Gets all the aliases for all the tables / subqueries and makes the
- * appropriate mapping in aliasToTabs, aliasToSubq
- * 2. Gets the location of the destination and names the clase "inclause" + i
- * 3. Creates a map from a string representation of an aggregation tree to the
- * actual aggregation AST
+ * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
+ * destination and names the clase "inclause" + i 3. Creates a map from a
+ * string representation of an aggregation tree to the actual aggregation AST
* 4. Creates a mapping from the clause name to the select expression AST in
- * destToSelExpr
- * 5. Creates a mapping from a table alias to the lateral view AST's in
- * aliasToLateralViews
- *
+ * destToSelExpr 5. Creates a mapping from a table alias to the lateral view
+ * AST's in aliasToLateralViews
+ *
* @param ast
* @param qb
* @param ctx_1
* @throws SemanticException
*/
- @SuppressWarnings({"fallthrough", "nls"})
+ @SuppressWarnings( { "fallthrough", "nls" })
public void doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1)
throws SemanticException {
@@ -566,8 +565,9 @@
qb.countSel();
qbp.setSelExprForClause(ctx_1.dest, ast);
- if (((ASTNode)ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST)
- qbp.setHints((ASTNode)ast.getChild(0));
+ if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) {
+ qbp.setHints((ASTNode) ast.getChild(0));
+ }
LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast);
qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
@@ -585,10 +585,12 @@
// is there a insert in the subquery
if (qbp.getIsSubQ()) {
- ASTNode ch = (ASTNode)ast.getChild(0);
- if ((ch.getToken().getType() != HiveParser.TOK_DIR) ||
- (((ASTNode)ch.getChild(0)).getToken().getType() != HiveParser.TOK_TMP_FILE))
- throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
+ ASTNode ch = (ASTNode) ast.getChild(0);
+ if ((ch.getToken().getType() != HiveParser.TOK_DIR)
+ || (((ASTNode) ch.getChild(0)).getToken().getType() != HiveParser.TOK_TMP_FILE)) {
+ throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY
+ .getMsg(ast));
+ }
}
qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
@@ -596,8 +598,9 @@
case HiveParser.TOK_FROM:
int child_count = ast.getChildCount();
- if (child_count != 1)
+ if (child_count != 1) {
throw new SemanticException("Multiple Children " + child_count);
+ }
// Check if this is a subquery / lateral view
ASTNode frm = (ASTNode) ast.getChild(0);
@@ -620,14 +623,16 @@
break;
case HiveParser.TOK_DISTRIBUTEBY:
- // Get the distribute by aliases - these are aliased to the entries in the
+ // Get the distribute by aliases - these are aliased to the entries in
+ // the
// select list
qbp.setDistributeByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
- throw new SemanticException(ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg(ast));
- }
- else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
- throw new SemanticException(ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg(ast));
+ throw new SemanticException(ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT
+ .getMsg(ast));
+ } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT
+ .getMsg(ast));
}
break;
@@ -636,10 +641,11 @@
// select list
qbp.setSortByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
- throw new SemanticException(ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg(ast));
- }
- else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
- throw new SemanticException(ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg(ast));
+ throw new SemanticException(ErrorMsg.CLUSTERBY_SORTBY_CONFLICT
+ .getMsg(ast));
+ } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(ErrorMsg.ORDERBY_SORTBY_CONFLICT
+ .getMsg(ast));
}
break;
@@ -649,7 +655,8 @@
// select list
qbp.setOrderByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
- throw new SemanticException(ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg(ast));
+ throw new SemanticException(ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT
+ .getMsg(ast));
}
break;
@@ -657,7 +664,8 @@
// Get the groupby aliases - these are aliased to the entries in the
// select list
if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
- throw new SemanticException(ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg(ast));
+ throw new SemanticException(ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY
+ .getMsg(ast));
}
qbp.setGroupByExprForClause(ctx_1.dest, ast);
skipRecursion = true;
@@ -668,10 +676,12 @@
break;
case HiveParser.TOK_UNION:
- // currently, we dont support subq1 union subq2 - the user has to explicitly say:
+ // currently, we dont support subq1 union subq2 - the user has to
+ // explicitly say:
// select * from (subq1 union subq2) subqalias
- if (!qbp.getIsSubQ())
+ if (!qbp.getIsSubQ()) {
throw new SemanticException(ErrorMsg.UNION_NOTIN_SUBQ.getMsg());
+ }
default:
skipRecursion = false;
@@ -713,18 +723,20 @@
String tab_name = qb.getTabNameForAlias(alias);
Table tab = null;
try {
- tab = this.db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tab_name);
- }
- catch (InvalidTableException ite) {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(qb.getParseInfo().getSrcForAlias(alias)));
+ tab = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tab_name);
+ } catch (InvalidTableException ite) {
+ throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(qb
+ .getParseInfo().getSrcForAlias(alias)));
}
if (tab.isView()) {
replaceViewReferenceWithDefinition(qb, tab, tab_name, alias);
continue;
}
- if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass()))
- throw new SemanticException(ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg(qb.getParseInfo().getSrcForAlias(alias)));
+ if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
+ throw new SemanticException(ErrorMsg.INVALID_INPUT_FORMAT_TYPE
+ .getMsg(qb.getParseInfo().getSrcForAlias(alias)));
+ }
qb.getMetaData().setSrcForAlias(alias, tab);
}
@@ -745,15 +757,18 @@
ASTNode ast = qbp.getDestForClause(name);
switch (ast.getToken().getType()) {
case HiveParser.TOK_TAB: {
- tableSpec ts = new tableSpec(this.db, conf, ast);
+ tableSpec ts = new tableSpec(db, conf, ast);
if (ts.tableHandle.isView()) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
- if (!HiveOutputFormat.class.isAssignableFrom(ts.tableHandle.getOutputFormatClass()))
- throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg(ast));
+ if (!HiveOutputFormat.class.isAssignableFrom(ts.tableHandle
+ .getOutputFormatClass())) {
+ throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE
+ .getMsg(ast));
+ }
- if(ts.partSpec == null) {
+ if (ts.partSpec == null) {
// This is a table
qb.getMetaData().setDestForAlias(name, ts.tableHandle);
} else {
@@ -763,49 +778,45 @@
break;
}
case HiveParser.TOK_LOCAL_DIR:
- case HiveParser.TOK_DIR:
- {
- // This is a dfs file
- String fname = stripQuotes(ast.getChild(0).getText());
- if ((!qb.getParseInfo().getIsSubQ()) &&
- (((ASTNode)ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE))
- {
- fname = ctx.getMRTmpFileURI();
- ctx.setResDir(new Path(fname));
-
- if ( qb.isCTAS() ) {
- qb.setIsQuery(false);
- } else {
- qb.setIsQuery(true);
- }
+ case HiveParser.TOK_DIR: {
+ // This is a dfs file
+ String fname = stripQuotes(ast.getChild(0).getText());
+ if ((!qb.getParseInfo().getIsSubQ())
+ && (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) {
+ fname = ctx.getMRTmpFileURI();
+ ctx.setResDir(new Path(fname));
+
+ if (qb.isCTAS()) {
+ qb.setIsQuery(false);
+ } else {
+ qb.setIsQuery(true);
}
- qb.getMetaData().setDestForAlias(name, fname,
- (ast.getToken().getType() == HiveParser.TOK_DIR));
- break;
}
+ qb.getMetaData().setDestForAlias(name, fname,
+ (ast.getToken().getType() == HiveParser.TOK_DIR));
+ break;
+ }
default:
- throw new SemanticException("Unknown Token Type " + ast.getToken().getType());
+ throw new SemanticException("Unknown Token Type "
+ + ast.getToken().getType());
}
}
} catch (HiveException e) {
- // Has to use full name to make sure it does not conflict with org.apache.commons.lang.StringUtils
+ // Has to use full name to make sure it does not conflict with
+ // org.apache.commons.lang.StringUtils
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
}
- private void replaceViewReferenceWithDefinition(
- QB qb, Table tab, String tab_name, String alias) throws SemanticException {
+ private void replaceViewReferenceWithDefinition(QB qb, Table tab,
+ String tab_name, String alias) throws SemanticException {
ParseDriver pd = new ParseDriver();
ASTNode viewTree;
- final ASTNodeOrigin viewOrigin =
- new ASTNodeOrigin(
- "VIEW",
- tab.getName(),
- tab.getViewExpandedText(),
- alias,
- qb.getParseInfo().getSrcForAlias(alias));
+ final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getName(),
+ tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias(
+ alias));
try {
String viewText = tab.getViewExpandedText();
// Reparse text, passing null for context to avoid clobbering
@@ -813,19 +824,17 @@
ASTNode tree = pd.parse(viewText, null);
tree = ParseUtils.findRootNonNullToken(tree);
viewTree = tree;
- Dispatcher nodeOriginDispatcher = new Dispatcher()
- {
- public Object dispatch(
- Node nd, java.util.Stack<Node> stack, Object... nodeOutputs)
- {
- ((ASTNode) nd).setOrigin(viewOrigin);
- return null;
- }
- };
- GraphWalker nodeOriginTagger =
- new DefaultGraphWalker(nodeOriginDispatcher);
- nodeOriginTagger.startWalking(
- java.util.Collections.<Node>singleton(viewTree), null);
+ Dispatcher nodeOriginDispatcher = new Dispatcher() {
+ public Object dispatch(Node nd, java.util.Stack<Node> stack,
+ Object... nodeOutputs) {
+ ((ASTNode) nd).setOrigin(viewOrigin);
+ return null;
+ }
+ };
+ GraphWalker nodeOriginTagger = new DefaultGraphWalker(
+ nodeOriginDispatcher);
+ nodeOriginTagger.startWalking(java.util.Collections
+ .<Node> singleton(viewTree), null);
} catch (ParseException e) {
// A user could encounter this if a stored view definition contains
// an old SQL construct which has been eliminated in a later Hive
@@ -843,43 +852,51 @@
}
private boolean isPresent(String[] list, String elem) {
- for (String s : list)
- if (s.equals(elem))
+ for (String s : list) {
+ if (s.equals(elem)) {
return true;
+ }
+ }
return false;
}
@SuppressWarnings("nls")
- private void parseJoinCondPopulateAlias(QBJoinTree joinTree,
- ASTNode condn, Vector<String> leftAliases, Vector<String> rightAliases,
- ArrayList<String> fields)
- throws SemanticException {
+ private void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn,
+ Vector<String> leftAliases, Vector<String> rightAliases,
+ ArrayList<String> fields) throws SemanticException {
// String[] allAliases = joinTree.getAllAliases();
switch (condn.getToken().getType()) {
case HiveParser.TOK_TABLE_OR_COL:
- String tableOrCol = unescapeIdentifier(condn.getChild(0).getText().toLowerCase());
- unparseTranslator.addIdentifierTranslation(
- (ASTNode) condn.getChild(0));
+ String tableOrCol = unescapeIdentifier(condn.getChild(0).getText()
+ .toLowerCase());
+ unparseTranslator.addIdentifierTranslation((ASTNode) condn.getChild(0));
if (isPresent(joinTree.getLeftAliases(), tableOrCol)) {
- if (!leftAliases.contains(tableOrCol))
+ if (!leftAliases.contains(tableOrCol)) {
leftAliases.add(tableOrCol);
+ }
} else if (isPresent(joinTree.getRightAliases(), tableOrCol)) {
- if (!rightAliases.contains(tableOrCol))
+ if (!rightAliases.contains(tableOrCol)) {
rightAliases.add(tableOrCol);
+ }
} else {
- // We don't support columns without table prefix in JOIN condition right now.
- // We need to pass Metadata here to know which table the column belongs to.
- throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(condn.getChild(0)));
+ // We don't support columns without table prefix in JOIN condition right
+ // now.
+ // We need to pass Metadata here to know which table the column belongs
+ // to.
+ throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(condn
+ .getChild(0)));
}
break;
case HiveParser.Identifier:
- // it may be a field name, return the identifier and let the caller decide whether it is or not
- if ( fields != null ) {
- fields.add(unescapeIdentifier(condn.getToken().getText().toLowerCase()));
+ // it may be a field name, return the identifier and let the caller decide
+ // whether it is or not
+ if (fields != null) {
+ fields
+ .add(unescapeIdentifier(condn.getToken().getText().toLowerCase()));
}
- unparseTranslator.addIdentifierTranslation((ASTNode) condn);
+ unparseTranslator.addIdentifierTranslation(condn);
break;
case HiveParser.Number:
case HiveParser.StringLiteral:
@@ -890,22 +907,24 @@
case HiveParser.TOK_FUNCTION:
// check all the arguments
- for (int i = 1; i < condn.getChildCount(); i++)
+ for (int i = 1; i < condn.getChildCount(); i++) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i),
leftAliases, rightAliases, null);
+ }
break;
default:
// This is an operator - so check whether it is unary or binary operator
- if (condn.getChildCount() == 1)
+ if (condn.getChildCount() == 1) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null);
- else if (condn.getChildCount() == 2) {
+ } else if (condn.getChildCount() == 2) {
ArrayList<String> fields1 = null;
- // if it is a dot operator, remember the field name of the rhs of the left semijoin
- if (joinTree.getNoSemiJoin() == false &&
- condn.getToken().getType() == HiveParser.DOT) {
+ // if it is a dot operator, remember the field name of the rhs of the
+ // left semijoin
+ if (joinTree.getNoSemiJoin() == false
+ && condn.getToken().getType() == HiveParser.DOT) {
// get the semijoin rhs table name and field name
fields1 = new ArrayList<String>();
int rhssize = rightAliases.size();
@@ -913,13 +932,13 @@
leftAliases, rightAliases, null);
String rhsAlias = null;
- if ( rightAliases.size() > rhssize ) { // the new table is rhs table
- rhsAlias = rightAliases.get(rightAliases.size()-1);
+ if (rightAliases.size() > rhssize) { // the new table is rhs table
+ rhsAlias = rightAliases.get(rightAliases.size() - 1);
}
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
leftAliases, rightAliases, fields1);
- if ( rhsAlias != null && fields1.size() > 0 ) {
+ if (rhsAlias != null && fields1.size() > 0) {
joinTree.addRHSSemijoinColumns(rhsAlias, condn);
}
} else {
@@ -928,9 +947,10 @@
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
leftAliases, rightAliases, fields1);
}
- } else
+ } else {
throw new SemanticException(condn.toStringTree() + " encountered with "
+ condn.getChildCount() + " children");
+ }
break;
}
}
@@ -938,86 +958,107 @@
private void populateAliases(Vector<String> leftAliases,
Vector<String> rightAliases, ASTNode condn, QBJoinTree joinTree,
Vector<String> leftSrc) throws SemanticException {
- if ((leftAliases.size() != 0) && (rightAliases.size() != 0))
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(condn));
+ if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
+ .getMsg(condn));
+ }
if (rightAliases.size() != 0) {
assert rightAliases.size() == 1;
joinTree.getExpressions().get(1).add(condn);
} else if (leftAliases.size() != 0) {
joinTree.getExpressions().get(0).add(condn);
- for (String s : leftAliases)
- if (!leftSrc.contains(s))
+ for (String s : leftAliases) {
+ if (!leftSrc.contains(s)) {
leftSrc.add(s);
- } else
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_2.getMsg(condn));
+ }
+ }
+ } else {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_2
+ .getMsg(condn));
+ }
}
/**
- * Parse the join condition.
- * If the condition is a join condition, throw an error if it is not an equality. Otherwise, break it into left and
- * right expressions and store in the join tree.
- * If the condition is a join filter, add it to the filter list of join tree. The join condition can contains conditions
- * on both the left and tree trees and filters on either. Currently, we only support equi-joins, so we throw an error
- * if the condition involves both subtrees and is not a equality. Also, we only support AND i.e ORs are not supported
- * currently as their semantics are not very clear, may lead to data explosion and there is no usecase.
- * @param joinTree jointree to be populated
- * @param joinCond join condition
- * @param leftSrc left sources
+ * Parse the join condition. If the condition is a join condition, throw an
+ * error if it is not an equality. Otherwise, break it into left and right
+ * expressions and store in the join tree. If the condition is a join filter,
+ * add it to the filter list of join tree. The join condition can contains
+ * conditions on both the left and tree trees and filters on either.
+ * Currently, we only support equi-joins, so we throw an error if the
+ * condition involves both subtrees and is not a equality. Also, we only
+ * support AND i.e ORs are not supported currently as their semantics are not
+ * very clear, may lead to data explosion and there is no usecase.
+ *
+ * @param joinTree
+ * jointree to be populated
+ * @param joinCond
+ * join condition
+ * @param leftSrc
+ * left sources
* @throws SemanticException
*/
- private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, Vector<String> leftSrc)
- throws SemanticException {
- if (joinCond == null)
+ private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond,
+ Vector<String> leftSrc) throws SemanticException {
+ if (joinCond == null) {
return;
+ }
switch (joinCond.getToken().getType()) {
case HiveParser.KW_OR:
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3.getMsg(joinCond));
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3
+ .getMsg(joinCond));
case HiveParser.KW_AND:
- parseJoinCondition(joinTree, (ASTNode) joinCond
- .getChild(0), leftSrc);
- parseJoinCondition(joinTree, (ASTNode) joinCond
- .getChild(1), leftSrc);
+ parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc);
+ parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(1), leftSrc);
break;
case HiveParser.EQUAL:
ASTNode leftCondn = (ASTNode) joinCond.getChild(0);
Vector<String> leftCondAl1 = new Vector<String>();
Vector<String> leftCondAl2 = new Vector<String>();
- parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2, null);
+ parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2,
+ null);
ASTNode rightCondn = (ASTNode) joinCond.getChild(1);
Vector<String> rightCondAl1 = new Vector<String>();
Vector<String> rightCondAl2 = new Vector<String>();
- parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, rightCondAl2, null);
+ parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1,
+ rightCondAl2, null);
// is it a filter or a join condition
- if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) ||
- ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0)))
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(joinCond));
+ if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
+ || ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
+ .getMsg(joinCond));
+ }
if (leftCondAl1.size() != 0) {
- if ((rightCondAl1.size() != 0) || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0)))
+ if ((rightCondAl1.size() != 0)
+ || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
joinTree.getFilters().get(0).add(joinCond);
- else if (rightCondAl2.size() != 0) {
- populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, leftSrc);
- populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, leftSrc);
- }
- }
- else if (leftCondAl2.size() != 0) {
- if ((rightCondAl2.size() != 0) || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0)))
+ } else if (rightCondAl2.size() != 0) {
+ populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
+ leftSrc);
+ populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
+ leftSrc);
+ }
+ } else if (leftCondAl2.size() != 0) {
+ if ((rightCondAl2.size() != 0)
+ || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
joinTree.getFilters().get(1).add(joinCond);
- else if (rightCondAl1.size() != 0) {
- populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, leftSrc);
- populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, leftSrc);
+ } else if (rightCondAl1.size() != 0) {
+ populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
+ leftSrc);
+ populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
+ leftSrc);
}
- }
- else if (rightCondAl1.size() != 0)
+ } else if (rightCondAl1.size() != 0) {
joinTree.getFilters().get(0).add(joinCond);
- else
+ } else {
joinTree.getFilters().get(1).add(joinCond);
+ }
break;
@@ -1026,17 +1067,22 @@
// Create all children
int childrenBegin = (isFunction ? 1 : 0);
- ArrayList<Vector<String>> leftAlias = new ArrayList<Vector<String>>(joinCond.getChildCount() - childrenBegin);
- ArrayList<Vector<String>> rightAlias = new ArrayList<Vector<String>>(joinCond.getChildCount() - childrenBegin);
+ ArrayList<Vector<String>> leftAlias = new ArrayList<Vector<String>>(
+ joinCond.getChildCount() - childrenBegin);
+ ArrayList<Vector<String>> rightAlias = new ArrayList<Vector<String>>(
+ joinCond.getChildCount() - childrenBegin);
for (int ci = 0; ci < joinCond.getChildCount() - childrenBegin; ci++) {
- Vector<String> left = new Vector<String>();
+ Vector<String> left = new Vector<String>();
Vector<String> right = new Vector<String>();
leftAlias.add(left);
rightAlias.add(right);
}
- for (int ci=childrenBegin; ci<joinCond.getChildCount(); ci++)
- parseJoinCondPopulateAlias(joinTree, (ASTNode)joinCond.getChild(ci), leftAlias.get(ci-childrenBegin), rightAlias.get(ci-childrenBegin), null);
+ for (int ci = childrenBegin; ci < joinCond.getChildCount(); ci++) {
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(ci),
+ leftAlias.get(ci - childrenBegin), rightAlias.get(ci
+ - childrenBegin), null);
+ }
boolean leftAliasNull = true;
for (Vector<String> left : leftAlias) {
@@ -1054,69 +1100,80 @@
}
}
- if (!leftAliasNull && !rightAliasNull)
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(joinCond));
+ if (!leftAliasNull && !rightAliasNull) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
+ .getMsg(joinCond));
+ }
- if (!leftAliasNull)
+ if (!leftAliasNull) {
joinTree.getFilters().get(0).add(joinCond);
- else
+ } else {
joinTree.getFilters().get(1).add(joinCond);
+ }
break;
}
}
@SuppressWarnings("nls")
- public <T extends Serializable> Operator<T> putOpInsertMap(Operator<T> op, RowResolver rr)
- {
+ public <T extends Serializable> Operator<T> putOpInsertMap(Operator<T> op,
+ RowResolver rr) {
OpParseContext ctx = new OpParseContext(rr);
opParseCtx.put(op, ctx);
return op;
}
@SuppressWarnings("nls")
- private Operator genFilterPlan(String dest, QB qb,
- Operator input) throws SemanticException {
+ private Operator genFilterPlan(String dest, QB qb, Operator input)
+ throws SemanticException {
ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest);
- return genFilterPlan(qb, (ASTNode)whereExpr.getChild(0), input);
+ return genFilterPlan(qb, (ASTNode) whereExpr.getChild(0), input);
}
/**
* create a filter plan. The condition and the inputs are specified.
- * @param qb current query block
- * @param condn The condition to be resolved
- * @param input the input operator
+ *
+ * @param qb
+ * current query block
+ * @param condn
+ * The condition to be resolved
+ * @param input
+ * the input operator
*/
@SuppressWarnings("nls")
- private Operator genFilterPlan(QB qb, ASTNode condn, Operator input) throws SemanticException {
+ private Operator genFilterPlan(QB qb, ASTNode condn, Operator input)
+ throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRR();
- Operator output = putOpInsertMap(
- OperatorFactory.getAndMakeChild(
- new filterDesc(genExprNodeDesc(condn, inputRR), false),
- new RowSchema(inputRR.getColumnInfos()), input), inputRR);
+ Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
+ new filterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema(
+ inputRR.getColumnInfos()), input), inputRR);
- LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: " + inputRR.toString());
+ LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: "
+ + inputRR.toString());
return output;
}
@SuppressWarnings("nls")
- private Integer genColListRegex(String colRegex, String tabAlias, String alias, ASTNode sel,
- ArrayList<exprNodeDesc> col_list, RowResolver input, Integer pos,
- RowResolver output) throws SemanticException {
+ private Integer genColListRegex(String colRegex, String tabAlias,
+ String alias, ASTNode sel, ArrayList<exprNodeDesc> col_list,
+ RowResolver input, Integer pos, RowResolver output)
+ throws SemanticException {
// The table alias should exist
- if (tabAlias != null && !input.hasTableAlias(tabAlias))
+ if (tabAlias != null && !input.hasTableAlias(tabAlias)) {
throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel));
+ }
// TODO: Have to put in the support for AS clause
Pattern regex = null;
try {
regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE);
} catch (PatternSyntaxException e) {
- throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e.getMessage()));
+ throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e
+ .getMessage()));
}
StringBuilder replacementText = new StringBuilder();
@@ -1124,9 +1181,9 @@
// This is the tab.* case
// In this case add all the columns to the fieldList
// from the input schema
- for(ColumnInfo colInfo: input.getColumnInfos()) {
+ for (ColumnInfo colInfo : input.getColumnInfos()) {
String name = colInfo.getInternalName();
- String [] tmp = input.reverseLookup(name);
+ String[] tmp = input.reverseLookup(name);
// Skip the colinfos which are not for this particular alias
if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
@@ -1139,14 +1196,13 @@
}
exprNodeColumnDesc expr = new exprNodeColumnDesc(colInfo.getType(), name,
- colInfo.getTabAlias(),
- colInfo.getIsPartitionCol());
+ colInfo.getTabAlias(), colInfo.getIsPartitionCol());
col_list.add(expr);
output.put(tmp[0], tmp[1],
- new ColumnInfo(getColumnInternalName(pos), colInfo.getType(),
- colInfo.getTabAlias(), colInfo.getIsPartitionCol()));
+ new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo
+ .getTabAlias(), colInfo.getIsPartitionCol()));
pos = Integer.valueOf(pos.intValue() + 1);
- matched ++;
+ matched++;
if (unparseTranslator.isEnabled()) {
if (replacementText.length() > 0) {
@@ -1171,27 +1227,28 @@
return HiveConf.getColumnInternalName(pos);
}
-
/**
* If the user script command needs any modifications - do it here
*/
private String getFixedCmd(String cmd) {
SessionState ss = SessionState.get();
- if(ss == null)
+ if (ss == null) {
return cmd;
+ }
// for local mode - replace any references to packaged files by name with
// the reference to the original file path
- if(ss.getConf().get("mapred.job.tracker", "local").equals("local")) {
- Set<String> files = ss.list_resource(SessionState.ResourceType.FILE, null);
- if((files != null) && !files.isEmpty()) {
+ if (ss.getConf().get("mapred.job.tracker", "local").equals("local")) {
+ Set<String> files = ss
+ .list_resource(SessionState.ResourceType.FILE, null);
+ if ((files != null) && !files.isEmpty()) {
int end = cmd.indexOf(" ");
String prog = (end == -1) ? cmd : cmd.substring(0, end);
- String args = (end == -1) ? "" : cmd.substring(end, cmd.length());
+ String args = (end == -1) ? "" : cmd.substring(end, cmd.length());
- for(String oneFile: files) {
+ for (String oneFile : files) {
Path p = new Path(oneFile);
- if(p.getName().equals(prog)) {
+ if (p.getName().equals(prog)) {
cmd = oneFile + args;
break;
}
@@ -1202,60 +1259,72 @@
return cmd;
}
- private tableDesc getTableDescFromSerDe(ASTNode child, String cols, String colTypes, boolean defaultCols) throws SemanticException {
+ private tableDesc getTableDescFromSerDe(ASTNode child, String cols,
+ String colTypes, boolean defaultCols) throws SemanticException {
if (child.getType() == HiveParser.TOK_SERDENAME) {
String serdeName = unescapeSQLString(child.getChild(0).getText());
Class<? extends Deserializer> serdeClass = null;
try {
- serdeClass = (Class<? extends Deserializer>)Class.forName(serdeName, true, JavaUtils.getClassLoader());
+ serdeClass = (Class<? extends Deserializer>) Class.forName(serdeName,
+ true, JavaUtils.getClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
- tableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer.toString(Utilities.tabCode), cols, colTypes, defaultCols, true);
+ tableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer
+ .toString(Utilities.tabCode), cols, colTypes, defaultCols, true);
// copy all the properties
if (child.getChildCount() == 2) {
- ASTNode prop = (ASTNode)((ASTNode)child.getChild(1)).getChild(0);
+ ASTNode prop = (ASTNode) ((ASTNode) child.getChild(1)).getChild(0);
for (int propChild = 0; propChild < prop.getChildCount(); propChild++) {
- String key = unescapeSQLString(prop.getChild(propChild).getChild(0).getText());
- String value = unescapeSQLString(prop.getChild(propChild).getChild(1).getText());
- tblDesc.getProperties().setProperty(key,value);
+ String key = unescapeSQLString(prop.getChild(propChild).getChild(0)
+ .getText());
+ String value = unescapeSQLString(prop.getChild(propChild).getChild(1)
+ .getText());
+ tblDesc.getProperties().setProperty(key, value);
}
}
return tblDesc;
- }
- else if (child.getType() == HiveParser.TOK_SERDEPROPS) {
- tableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer.toString(Utilities.ctrlaCode), cols, colTypes, defaultCols);
+ } else if (child.getType() == HiveParser.TOK_SERDEPROPS) {
+ tableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer
+ .toString(Utilities.ctrlaCode), cols, colTypes, defaultCols);
int numChildRowFormat = child.getChildCount();
- for (int numC = 0; numC < numChildRowFormat; numC++)
- {
- ASTNode rowChild = (ASTNode)child.getChild(numC);
+ for (int numC = 0; numC < numChildRowFormat; numC++) {
+ ASTNode rowChild = (ASTNode) child.getChild(numC);
switch (rowChild.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMATFIELD:
String fieldDelim = unescapeSQLString(rowChild.getChild(0).getText());
- tblDesc.getProperties().setProperty(Constants.FIELD_DELIM, fieldDelim);
- tblDesc.getProperties().setProperty(Constants.SERIALIZATION_FORMAT, fieldDelim);
-
- if (rowChild.getChildCount()>=2) {
- String fieldEscape = unescapeSQLString(rowChild.getChild(1).getText());
- tblDesc.getProperties().setProperty(Constants.ESCAPE_CHAR, fieldEscape);
+ tblDesc.getProperties()
+ .setProperty(Constants.FIELD_DELIM, fieldDelim);
+ tblDesc.getProperties().setProperty(Constants.SERIALIZATION_FORMAT,
+ fieldDelim);
+
+ if (rowChild.getChildCount() >= 2) {
+ String fieldEscape = unescapeSQLString(rowChild.getChild(1)
+ .getText());
+ tblDesc.getProperties().setProperty(Constants.ESCAPE_CHAR,
+ fieldEscape);
}
break;
case HiveParser.TOK_TABLEROWFORMATCOLLITEMS:
- tblDesc.getProperties().setProperty(Constants.COLLECTION_DELIM, unescapeSQLString(rowChild.getChild(0).getText()));
+ tblDesc.getProperties().setProperty(Constants.COLLECTION_DELIM,
+ unescapeSQLString(rowChild.getChild(0).getText()));
break;
case HiveParser.TOK_TABLEROWFORMATMAPKEYS:
- tblDesc.getProperties().setProperty(Constants.MAPKEY_DELIM, unescapeSQLString(rowChild.getChild(0).getText()));
+ tblDesc.getProperties().setProperty(Constants.MAPKEY_DELIM,
+ unescapeSQLString(rowChild.getChild(0).getText()));
break;
case HiveParser.TOK_TABLEROWFORMATLINES:
String lineDelim = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties().setProperty(Constants.LINE_DELIM, lineDelim);
if (!lineDelim.equals("\n") && !lineDelim.equals("10")) {
- throw new SemanticException(ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg());
+ throw new SemanticException(
+ ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg());
}
break;
- default: assert false;
+ default:
+ assert false;
}
}
@@ -1266,68 +1335,77 @@
return null;
}
- private void failIfColAliasExists(Set<String> nameSet, String name) throws SemanticException {
- if(nameSet.contains(name))
- throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS.getMsg(name));
+ private void failIfColAliasExists(Set<String> nameSet, String name)
+ throws SemanticException {
+ if (nameSet.contains(name)) {
+ throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS
+ .getMsg(name));
+ }
nameSet.add(name);
}
@SuppressWarnings("nls")
- private Operator genScriptPlan(ASTNode trfm, QB qb,
- Operator input) throws SemanticException {
+ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input)
+ throws SemanticException {
// If there is no "AS" clause, the output schema will be "key,value"
ArrayList<ColumnInfo> outputCols = new ArrayList<ColumnInfo>();
- int inputSerDeNum = 1, inputRecordWriterNum = 2;
- int outputSerDeNum = 4, outputRecordReaderNum = 5;
- int outputColsNum = 6;
+ int inputSerDeNum = 1, inputRecordWriterNum = 2;
+ int outputSerDeNum = 4, outputRecordReaderNum = 5;
+ int outputColsNum = 6;
boolean outputColNames = false, outputColSchemas = false;
- int execPos = 3;
+ int execPos = 3;
boolean defaultOutputCols = false;
// Go over all the children
if (trfm.getChildCount() > outputColsNum) {
- ASTNode outCols = (ASTNode)trfm.getChild(outputColsNum);
- if (outCols.getType() == HiveParser.TOK_ALIASLIST)
+ ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
+ if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
outputColNames = true;
- else if (outCols.getType() == HiveParser.TOK_TABCOLLIST)
+ } else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
outputColSchemas = true;
+ }
}
// If column type is not specified, use a string
if (!outputColNames && !outputColSchemas) {
String intName = getColumnInternalName(0);
- ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
+ ColumnInfo colInfo = new ColumnInfo(intName,
+ TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias("key");
outputCols.add(colInfo);
intName = getColumnInternalName(1);
- colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
+ colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null,
+ false);
colInfo.setAlias("value");
outputCols.add(colInfo);
defaultOutputCols = true;
- }
- else {
+ } else {
ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
int ccount = collist.getChildCount();
Set<String> colAliasNamesDuplicateCheck = new HashSet<String>();
if (outputColNames) {
- for (int i=0; i < ccount; ++i) {
- String colAlias = unescapeIdentifier(((ASTNode)collist.getChild(i)).getText());
+ for (int i = 0; i < ccount; ++i) {
+ String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i))
+ .getText());
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
- ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
+ ColumnInfo colInfo = new ColumnInfo(intName,
+ TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
- }
- else {
- for (int i=0; i < ccount; ++i) {
+ } else {
+ for (int i = 0; i < ccount; ++i) {
ASTNode child = (ASTNode) collist.getChild(i);
assert child.getType() == HiveParser.TOK_TABCOL;
- String colAlias = unescapeIdentifier(((ASTNode)child.getChild(0)).getText());
+ String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0))
+ .getText());
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
- ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode)child.getChild(1))), null, false);
+ ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils
+ .getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child
+ .getChild(1))), null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
@@ -1347,15 +1425,14 @@
columns.append(outputCols.get(i).getInternalName());
columnTypes.append(outputCols.get(i).getType().getTypeName());
- out_rwsch.put(
- qb.getParseInfo().getAlias(),
- outputCols.get(i).getAlias(),
- outputCols.get(i));
+ out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(),
+ outputCols.get(i));
}
StringBuilder inpColumns = new StringBuilder();
StringBuilder inpColumnTypes = new StringBuilder();
- Vector<ColumnInfo> inputSchema = opParseCtx.get(input).getRR().getColumnInfos();
+ Vector<ColumnInfo> inputSchema = opParseCtx.get(input).getRR()
+ .getColumnInfos();
for (int i = 0; i < inputSchema.size(); ++i) {
if (i != 0) {
inpColumns.append(",");
@@ -1368,80 +1445,102 @@
tableDesc outInfo;
tableDesc inInfo;
- String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
+ String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
Class<? extends Deserializer> serde;
try {
- serde = (Class<? extends Deserializer>)Class.forName(defaultSerdeName, true, JavaUtils.getClassLoader());
+ serde = (Class<? extends Deserializer>) Class.forName(defaultSerdeName,
+ true, JavaUtils.getClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
// Input and Output Serdes
- if (trfm.getChild(inputSerDeNum).getChildCount() > 0)
- inInfo = getTableDescFromSerDe((ASTNode)(((ASTNode)trfm.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), inpColumnTypes.toString(), false);
- else
- inInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), inpColumns.toString(), inpColumnTypes.toString(), false, true);
-
- if (trfm.getChild(outputSerDeNum).getChildCount() > 0)
- outInfo = getTableDescFromSerDe((ASTNode)(((ASTNode)trfm.getChild(outputSerDeNum))).getChild(0), columns.toString(), columnTypes.toString(), false);
- // This is for backward compatibility. If the user did not specify the output column list, we assume that there are 2 columns: key and value.
- // However, if the script outputs: col1, col2, col3 seperated by TAB, the requirement is: key is col and value is (col2 TAB col3)
- else
- outInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), columns.toString(), columnTypes.toString(), defaultOutputCols);
+ if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
+ inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
+ .getChild(inputSerDeNum))).getChild(0), inpColumns.toString(),
+ inpColumnTypes.toString(), false);
+ } else {
+ inInfo = PlanUtils.getTableDesc(serde, Integer
+ .toString(Utilities.tabCode), inpColumns.toString(), inpColumnTypes
+ .toString(), false, true);
+ }
+
+ if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
+ outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
+ .getChild(outputSerDeNum))).getChild(0), columns.toString(),
+ columnTypes.toString(), false);
+ // This is for backward compatibility. If the user did not specify the
+ // output column list, we assume that there are 2 columns: key and value.
+ // However, if the script outputs: col1, col2, col3 seperated by TAB, the
+ // requirement is: key is col and value is (col2 TAB col3)
+ } else {
+ outInfo = PlanUtils.getTableDesc(serde, Integer
+ .toString(Utilities.tabCode), columns.toString(), columnTypes
+ .toString(), defaultOutputCols);
+ }
// Output record readers
- Class <? extends RecordReader> outRecordReader = getRecordReader((ASTNode)trfm.getChild(outputRecordReaderNum));
- Class <? extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode)trfm.getChild(inputRecordWriterNum));
+ Class<? extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm
+ .getChild(outputRecordReaderNum));
+ Class<? extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm
+ .getChild(inputRecordWriterNum));
- Operator output = putOpInsertMap(OperatorFactory
- .getAndMakeChild(
- new scriptDesc(getFixedCmd(stripQuotes(trfm.getChild(execPos).getText())),
- inInfo, inRecordWriter, outInfo, outRecordReader),
- new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
+ Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
+ new scriptDesc(
+ getFixedCmd(stripQuotes(trfm.getChild(execPos).getText())), inInfo,
+ inRecordWriter, outInfo, outRecordReader), new RowSchema(out_rwsch
+ .getColumnInfos()), input), out_rwsch);
return output;
}
- private Class<? extends RecordReader> getRecordReader(ASTNode node) throws SemanticException {
+ private Class<? extends RecordReader> getRecordReader(ASTNode node)
+ throws SemanticException {
String name;
- if (node.getChildCount() == 0)
+ if (node.getChildCount() == 0) {
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
- else
+ } else {
name = unescapeSQLString(node.getChild(0).getText());
+ }
try {
- return (Class<? extends RecordReader>)Class.forName(name, true, JavaUtils.getClassLoader());
+ return (Class<? extends RecordReader>) Class.forName(name, true,
+ JavaUtils.getClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
- private Class<? extends RecordWriter> getRecordWriter(ASTNode node) throws SemanticException {
+ private Class<? extends RecordWriter> getRecordWriter(ASTNode node)
+ throws SemanticException {
String name;
- if (node.getChildCount() == 0)
+ if (node.getChildCount() == 0) {
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDWRITER);
- else
+ } else {
name = unescapeSQLString(node.getChild(0).getText());
+ }
try {
- return (Class<? extends RecordWriter>)Class.forName(name, true, JavaUtils.getClassLoader());
+ return (Class<? extends RecordWriter>) Class.forName(name, true,
+ JavaUtils.getClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
/**
- * This function is a wrapper of parseInfo.getGroupByForClause which automatically
- * translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY a,b,c.
+ * This function is a wrapper of parseInfo.getGroupByForClause which
+ * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY
+ * a,b,c.
*/
static List<ASTNode> getGroupByForClause(QBParseInfo parseInfo, String dest) {
if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
ASTNode selectExprs = parseInfo.getSelForClause(dest);
- List<ASTNode> result = new ArrayList<ASTNode>(selectExprs == null
- ? 0 : selectExprs.getChildCount());
+ List<ASTNode> result = new ArrayList<ASTNode>(selectExprs == null ? 0
+ : selectExprs.getChildCount());
if (selectExprs != null) {
for (int i = 0; i < selectExprs.getChildCount(); ++i) {
// table.column AS alias
@@ -1452,8 +1551,8 @@
return result;
} else {
ASTNode grpByExprs = parseInfo.getGroupByForClause(dest);
- List<ASTNode> result = new ArrayList<ASTNode>(grpByExprs == null
- ? 0 : grpByExprs.getChildCount());
+ List<ASTNode> result = new ArrayList<ASTNode>(grpByExprs == null ? 0
+ : grpByExprs.getChildCount());
if (grpByExprs != null) {
for (int i = 0; i < grpByExprs.getChildCount(); ++i) {
ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i);
@@ -1464,14 +1563,15 @@
}
}
- private static String[] getColAlias(ASTNode selExpr, String defaultName, RowResolver inputRR) {
+ private static String[] getColAlias(ASTNode selExpr, String defaultName,
+ RowResolver inputRR) {
String colAlias = null;
String tabAlias = null;
String[] colRef = new String[2];
if (selExpr.getChildCount() == 2) {
// return zz for "xx + yy AS zz"
- colAlias = unescapeIdentifier(selExpr.getChild(1).getText());
+ colAlias = unescapeIdentifier(selExpr.getChild(1).getText());
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
@@ -1501,7 +1601,7 @@
}
}
- if(colAlias == null) {
+ if (colAlias == null) {
// Return defaultName if selExpr is not a simple xx.yy.zz
colAlias = defaultName;
}
@@ -1512,11 +1612,11 @@
}
/**
- * Returns whether the pattern is a regex expression (instead of a normal string).
- * Normal string is a string with all alphabets/digits and "_".
+ * Returns whether the pattern is a regex expression (instead of a normal
+ * string). Normal string is a string with all alphabets/digits and "_".
*/
private static boolean isRegex(String pattern) {
- for(int i=0; i<pattern.length(); i++) {
+ for (int i = 0; i < pattern.length(); i++) {
if (!Character.isLetterOrDigit(pattern.charAt(i))
&& pattern.charAt(i) != '_') {
return true;
@@ -1525,17 +1625,18 @@
return false;
}
- private Operator<?> genSelectPlan(String dest, QB qb,
- Operator<?> input) throws SemanticException {
+ private Operator<?> genSelectPlan(String dest, QB qb, Operator<?> input)
+ throws SemanticException {
ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
Operator<?> op = genSelectPlan(selExprList, qb, input);
LOG.debug("Created Select Plan for clause: " + dest);
return op;
}
+
@SuppressWarnings("nls")
private Operator<?> genSelectPlan(ASTNode selExprList, QB qb,
- Operator<?> input) throws SemanticException {
+ Operator<?> input) throws SemanticException {
LOG.debug("tree: " + selExprList.toStringTree());
ArrayList<exprNodeDesc> col_list = new ArrayList<exprNodeDesc>();
@@ -1552,8 +1653,7 @@
posn++;
}
- boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType()
- == HiveParser.TOK_TRANSFORM);
+ boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM);
if (isInTransform) {
trfm = (ASTNode) selExprList.getChild(posn).getChild(0);
}
@@ -1569,17 +1669,16 @@
GenericUDTF genericUDTF = null;
if (udtfExpr.getType() == HiveParser.TOK_FUNCTION) {
- String funcName =
- TypeCheckProcFactory.DefaultExprProcessor.getFunctionText(
- udtfExpr, true);
+ String funcName = TypeCheckProcFactory.DefaultExprProcessor
+ .getFunctionText(udtfExpr, true);
FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
if (fi != null) {
genericUDTF = fi.getGenericUDTF();
}
isUDTF = (genericUDTF != null);
if (isUDTF && !fi.isNative()) {
- unparseTranslator.addIdentifierTranslation(
- (ASTNode) udtfExpr.getChild(0));
+ unparseTranslator.addIdentifierTranslation((ASTNode) udtfExpr
+ .getChild(0));
}
}
@@ -1595,7 +1694,7 @@
}
// Get the column / table aliases from the expression. Start from 1 as
// 0 is the TOK_FUNCTION
- for (int i=1; i<selExpr.getChildCount(); i++) {
+ for (int i = 1; i < selExpr.getChildCount(); i++) {
ASTNode selExprChild = (ASTNode) selExpr.getChild(i);
switch (selExprChild.getType()) {
case HiveParser.Identifier:
@@ -1603,14 +1702,14 @@
unparseTranslator.addIdentifierTranslation(selExprChild);
break;
case HiveParser.TOK_TABALIAS:
- assert(selExprChild.getChildCount() == 1);
- udtfTableAlias =
- unescapeIdentifier(selExprChild.getChild(0).getText());
- unparseTranslator.addIdentifierTranslation(
- (ASTNode) selExprChild.getChild(0));
+ assert (selExprChild.getChildCount() == 1);
+ udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0)
+ .getText());
+ unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild
+ .getChild(0));
break;
default:
- assert(false);
+ assert (false);
}
}
LOG.debug("UDTF table alias is " + udtfTableAlias);
@@ -1622,7 +1721,7 @@
if (isInTransform) {
exprList = (ASTNode) trfm.getChild(0);
} else if (isUDTF) {
- exprList = (ASTNode) udtfExpr;
+ exprList = udtfExpr;
} else {
exprList = selExprList;
}
@@ -1638,9 +1737,9 @@
// child can be EXPR AS ALIAS, or EXPR.
ASTNode child = (ASTNode) exprList.getChild(i);
boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2);
-
+
// EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's
- // This check is not needed and invalid when there is a transform b/c the
+ // This check is not needed and invalid when there is a transform b/c the
// AST's are slightly different.
if (!isInTransform && !isUDTF && child.getChildCount() > 2) {
throw new SemanticException(ErrorMsg.INVALID_AS.getMsg());
@@ -1660,20 +1759,19 @@
tabAlias = colRef[0];
colAlias = colRef[1];
if (hasAsClause) {
- unparseTranslator.addIdentifierTranslation(
- (ASTNode) child.getChild(1));
+ unparseTranslator.addIdentifierTranslation((ASTNode) child
+ .getChild(1));
}
// Get rid of TOK_SELEXPR
- expr = (ASTNode)child.getChild(0);
+ expr = (ASTNode) child.getChild(0);
}
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
- pos = genColListRegex(".*",
- expr.getChildCount() == 0 ? null : unescapeIdentifier(expr.getChild(0).getText().toLowerCase()),
+ pos = genColListRegex(".*", expr.getChildCount() == 0 ? null
+ : unescapeIdentifier(expr.getChild(0).getText().toLowerCase()),
alias, expr, col_list, inputRR, pos, out_rwsch);
selectStar = true;
- } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL
- && !hasAsClause
+ } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(unescapeIdentifier(expr.getChild(0).getText()))) {
// In case the expression is a regex COL.
@@ -1683,27 +1781,27 @@
null, alias, expr, col_list, inputRR, pos, out_rwsch);
} else if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
- && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()))
- && !hasAsClause
+ && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
+ .getChild(0).getText().toLowerCase())) && !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(unescapeIdentifier(expr.getChild(1).getText()))) {
// In case the expression is TABLE.COL (col can be regex).
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
- unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()),
- alias, expr, col_list, inputRR, pos, out_rwsch);
+ unescapeIdentifier(expr.getChild(0).getChild(0).getText()
+ .toLowerCase()), alias, expr, col_list, inputRR, pos, out_rwsch);
} else {
// Case when this is an expression
exprNodeDesc exp = genExprNodeDesc(expr, inputRR);
col_list.add(exp);
- if (!StringUtils.isEmpty(alias) &&
- (out_rwsch.get(null, colAlias) != null)) {
- throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(expr.getChild(1)));
- }
- out_rwsch.put(tabAlias, colAlias,
- new ColumnInfo(getColumnInternalName(pos),
- exp.getTypeInfo(), tabAlias, false));
+ if (!StringUtils.isEmpty(alias)
+ && (out_rwsch.get(null, colAlias) != null)) {
+ throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(expr
+ .getChild(1)));
+ }
+ out_rwsch.put(tabAlias, colAlias, new ColumnInfo(
+ getColumnInternalName(pos), exp.getTypeInfo(), tabAlias, false));
pos = Integer.valueOf(pos.intValue() + 1);
}
@@ -1712,10 +1810,11 @@
ArrayList<String> columnNames = new ArrayList<String>();
Map<String, exprNodeDesc> colExprMap = new HashMap<String, exprNodeDesc>();
- for (int i=0; i<col_list.size(); i++) {
+ for (int i = 0; i < col_list.size(); i++) {
// Replace NULL with CAST(NULL AS STRING)
if (col_list.get(i) instanceof exprNodeNullDesc) {
- col_list.set(i, new exprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, null));
+ col_list.set(i, new exprNodeConstantDesc(
+ TypeInfoFactory.stringTypeInfo, null));
}
String outputCol = getColumnInternalName(i);
colExprMap.put(outputCol, col_list.get(i));
@@ -1723,8 +1822,8 @@
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
- new selectDesc(col_list, columnNames, selectStar), new RowSchema(out_rwsch.getColumnInfos()),
- input), out_rwsch);
+ new selectDesc(col_list, columnNames, selectStar), new RowSchema(
+ out_rwsch.getColumnInfos()), input), out_rwsch);
output.setColumnExprMap(colExprMap);
if (isInTransform) {
@@ -1733,7 +1832,7 @@
if (isUDTF) {
output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb,
- output);
+ output);
}
LOG.debug("Created Select Plan row schema: " + out_rwsch.toString());
return output;
@@ -1753,7 +1852,7 @@
*/
static ArrayList<TypeInfo> getTypeInfo(ArrayList<exprNodeDesc> exprs) {
ArrayList<TypeInfo> result = new ArrayList<TypeInfo>();
- for(exprNodeDesc expr: exprs) {
+ for (exprNodeDesc expr : exprs) {
result.add(expr.getTypeInfo());
}
return result;
@@ -1764,7 +1863,7 @@
*/
static ObjectInspector[] getStandardObjectInspector(ArrayList<TypeInfo> exprs) {
ObjectInspector[] result = new ObjectInspector[exprs.size()];
- for (int i=0; i<exprs.size(); i++) {
+ for (int i = 0; i < exprs.size(); i++) {
result[i] = TypeInfoUtils
.getStandardWritableObjectInspectorFromTypeInfo(exprs.get(i));
}
@@ -1772,31 +1871,36 @@
}
/**
- * Returns the GenericUDAFEvaluator for the aggregation.
- * This is called once for each GroupBy aggregation.
+ * Returns the GenericUDAFEvaluator for the aggregation. This is called once
+ * for each GroupBy aggregation.
*/
static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName,
- ArrayList<exprNodeDesc> aggParameters,
- ASTNode aggTree) throws SemanticException {
+ ArrayList<exprNodeDesc> aggParameters, ASTNode aggTree)
+ throws SemanticException {
ArrayList<TypeInfo> originalParameterTypeInfos = getTypeInfo(aggParameters);
GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator(
aggName, originalParameterTypeInfos);
if (null == result) {
- String reason = "Looking for UDAF Evaluator\"" + aggName + "\" with parameters "
- + originalParameterTypeInfos;
- throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE.
- getMsg((ASTNode)aggTree.getChild(0), reason));
+ String reason = "Looking for UDAF Evaluator\"" + aggName
+ + "\" with parameters " + originalParameterTypeInfos;
+ throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE.getMsg(
+ (ASTNode) aggTree.getChild(0), reason));
}
return result;
}
/**
* Returns the GenericUDAFInfo struct for the aggregation.
- * @param aggName The name of the UDAF.
- * @param aggParameters The exprNodeDesc of the original parameters
- * @param aggTree The ASTNode node of the UDAF in the query.
+ *
+ * @param aggName
+ * The name of the UDAF.
+ * @param aggParameters
+ * The exprNodeDesc of the original parameters
+ * @param aggTree
+ * The ASTNode node of the UDAF in the query.
* @return GenericUDAFInfo
- * @throws SemanticException when the UDAF is not found or has problems.
+ * @throws SemanticException
+ * when the UDAF is not found or has problems.
*/
static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator,
GenericUDAFEvaluator.Mode emode, ArrayList<exprNodeDesc> aggParameters)
@@ -1810,8 +1914,7 @@
// set r.returnType
ObjectInspector returnOI = null;
try {
- ObjectInspector[] aggObjectInspectors =
- getStandardObjectInspector(getTypeInfo(aggParameters));
+ ObjectInspector[] aggObjectInspectors = getStandardObjectInspector(getTypeInfo(aggParameters));
returnOI = r.genericUDAFEvaluator.init(emode, aggObjectInspectors);
r.returnType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
} catch (HiveException e) {
@@ -1824,35 +1927,49 @@
return r;
}
- private static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode(groupByDesc.Mode mode, boolean isDistinct) {
+ private static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode(
+ groupByDesc.Mode mode, boolean isDistinct) {
switch (mode) {
- case COMPLETE: return GenericUDAFEvaluator.Mode.COMPLETE;
- case PARTIAL1: return GenericUDAFEvaluator.Mode.PARTIAL1;
- case PARTIAL2: return GenericUDAFEvaluator.Mode.PARTIAL2;
- case PARTIALS: return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1 : GenericUDAFEvaluator.Mode.PARTIAL2;
- case FINAL: return GenericUDAFEvaluator.Mode.FINAL;
- case HASH: return GenericUDAFEvaluator.Mode.PARTIAL1;
- case MERGEPARTIAL: return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE : GenericUDAFEvaluator.Mode.FINAL;
- default:
- throw new RuntimeException("internal error in groupByDescModeToUDAFMode");
+ case COMPLETE:
+ return GenericUDAFEvaluator.Mode.COMPLETE;
+ case PARTIAL1:
+ return GenericUDAFEvaluator.Mode.PARTIAL1;
+ case PARTIAL2:
+ return GenericUDAFEvaluator.Mode.PARTIAL2;
+ case PARTIALS:
+ return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1
+ : GenericUDAFEvaluator.Mode.PARTIAL2;
+ case FINAL:
+ return GenericUDAFEvaluator.Mode.FINAL;
+ case HASH:
+ return GenericUDAFEvaluator.Mode.PARTIAL1;
+ case MERGEPARTIAL:
+ return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE
+ : GenericUDAFEvaluator.Mode.FINAL;
+ default:
+ throw new RuntimeException("internal error in groupByDescModeToUDAFMode");
}
}
+
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
- *
- * @param mode The mode of the aggregation (PARTIAL1 or COMPLETE)
- * @param genericUDAFEvaluators If not null, this function will store the mapping
- * from Aggregation StringTree to the genericUDAFEvaluator in this parameter,
- * so it can be used in the next-stage GroupBy aggregations.
+ *
+ * @param mode
+ * The mode of the aggregation (PARTIAL1 or COMPLETE)
+ * @param genericUDAFEvaluators
+ * If not null, this function will store the mapping from Aggregation
+ * StringTree to the genericUDAFEvaluator in this parameter, so it
+ * can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
- private Operator genGroupByPlanGroupByOperator(
- QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo,
- groupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators)
- throws SemanticException {
- RowResolver groupByInputRowResolver = opParseCtx.get(reduceSinkOperatorInfo).getRR();
+ private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo,
+ String dest, Operator reduceSinkOperatorInfo, groupByDesc.Mode mode,
+ Map<String, GenericUDAFEvaluator> genericUDAFEvaluators)
+ throws SemanticException {
+ RowResolver groupByInputRowResolver = opParseCtx
+ .get(reduceSinkOperatorInfo).getRR();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList<exprNodeDesc> groupByKeys = new ArrayList<exprNodeDesc>();
@@ -1863,18 +1980,18 @@
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
String text = grpbyExpr.toStringTree();
- ColumnInfo exprInfo = groupByInputRowResolver.get("",text);
+ ColumnInfo exprInfo = groupByInputRowResolver.get("", text);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
- groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(),
- exprInfo.getInternalName(), "", false));
+ groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), exprInfo
+ .getInternalName(), "", false));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
- groupByOutputRowResolver.put("",grpbyExpr.toStringTree(),
- new ColumnInfo(field, exprInfo.getType(), null, false));
+ groupByOutputRowResolver.put("", grpbyExpr.toStringTree(),
+ new ColumnInfo(field, exprInfo.getType(), null, false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// For each aggregation
@@ -1892,44 +2009,45 @@
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
String text = value.getChild(i).toStringTree();
- ASTNode paraExpr = (ASTNode)value.getChild(i);
- ColumnInfo paraExprInfo = groupByInputRowResolver.get("",text);
+ ASTNode paraExpr = (ASTNode) value.getChild(i);
+ ColumnInfo paraExprInfo = groupByInputRowResolver.get("", text);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
- assert(paraExpression != null);
+ assert (paraExpression != null);
aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(),
- paraExprInfo.getInternalName(),
- paraExprInfo.getTabAlias(),
- paraExprInfo.getIsPartitionCol()));
+ paraExprInfo.getInternalName(), paraExprInfo.getTabAlias(),
+ paraExprInfo.getIsPartitionCol()));
}
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
- GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value);
- assert(genericUDAFEvaluator != null);
- GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
- aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters,
- isDistinct, amode));
- String field = getColumnInternalName(groupByKeys.size() + aggregations.size() -1);
+ GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(
+ aggName, aggParameters, value);
+ assert (genericUDAFEvaluator != null);
+ GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
+ aggParameters);
+ aggregations.add(new aggregationDesc(aggName.toLowerCase(),
+ udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct,
+ amode));
+ String field = getColumnInternalName(groupByKeys.size()
+ + aggregations.size() - 1);
outputColumnNames.add(field);
- groupByOutputRowResolver.put("",value.toStringTree(),
- new ColumnInfo(field,
- udaf.returnType, "", false));
- // Save the evaluator so that it can be used by the next-stage GroupByOperators
+ groupByOutputRowResolver.put("", value.toStringTree(), new ColumnInfo(
+ field, udaf.returnType, "", false));
+ // Save the evaluator so that it can be used by the next-stage
+ // GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
- Operator op =
- putOpInsertMap(OperatorFactory.getAndMakeChild(new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false),
- new RowSchema(groupByOutputRowResolver.getColumnInfos()),
- reduceSinkOperatorInfo),
- groupByOutputRowResolver
- );
+ Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
+ new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
+ false), new RowSchema(groupByOutputRowResolver.getColumnInfos()),
+ reduceSinkOperatorInfo), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
@@ -1937,20 +2055,24 @@
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
- *
- * @param mode The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
- * @param genericUDAFEvaluators The mapping from Aggregation StringTree to the
- * genericUDAFEvaluator.
- * @param distPartAggr partial aggregation for distincts
+ *
+ * @param mode
+ * The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
+ * @param genericUDAFEvaluators
+ * The mapping from Aggregation StringTree to the
+ * genericUDAFEvaluator.
+ * @param distPartAggr
+ * partial aggregation for distincts
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
- private Operator genGroupByPlanGroupByOperator1(
[... 5338 lines stripped ...]