You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/09/23 22:00:11 UTC
svn commit: r1627140 [2/3] - in /hive/trunk: ./
common/src/java/org/apache/hadoop/hive/conf/
contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/
contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/
contrib/src/java/org/apache/hadoop/h...
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1627140&r1=1627139&r2=1627140&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Sep 23 20:00:10 2014
@@ -22,8 +22,10 @@ import static org.apache.hadoop.hive.con
import java.io.IOException;
import java.io.Serializable;
+import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -34,12 +36,18 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
+import net.hydromatic.optiq.SchemaPlus;
+import net.hydromatic.optiq.tools.Frameworks;
+
import org.antlr.runtime.ClassicToken;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree;
+import org.antlr.runtime.tree.TreeVisitor;
+import org.antlr.runtime.tree.TreeVisitorAction;
import org.antlr.runtime.tree.TreeWizard;
import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
import org.apache.commons.lang.StringUtils;
@@ -107,6 +115,29 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
+import org.apache.hadoop.hive.ql.optimizer.optiq.HiveDefaultRelMetadataProvider;
+import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.HiveTypeSystemImpl;
+import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException;
+import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveVolcanoPlanner;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveUnionRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePartitionPrunerRule;
+import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushFilterPastJoinRule;
+import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ASTConverter;
+import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinCondTypeCheckProcFactory;
+import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinTypeCheckCtx;
+import org.apache.hadoop.hive.ql.optimizer.optiq.translator.RexNodeConverter;
+import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter;
+import org.apache.hadoop.hive.ql.optimizer.optiq.translator.TypeConverter;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
@@ -192,12 +223,72 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.InputFormat;
+import org.eigenbase.rel.AggregateCall;
+import org.eigenbase.rel.Aggregation;
+import org.eigenbase.rel.FilterRelBase;
+import org.eigenbase.rel.InvalidRelException;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.JoinRelType;
+import org.eigenbase.rel.RelCollation;
+import org.eigenbase.rel.RelCollationImpl;
+import org.eigenbase.rel.RelFactories;
+import org.eigenbase.rel.RelFieldCollation;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.rel.metadata.CachingRelMetadataProvider;
+import org.eigenbase.rel.metadata.ChainedRelMetadataProvider;
+import org.eigenbase.rel.metadata.RelMetadataProvider;
+import org.eigenbase.rel.rules.ConvertMultiJoinRule;
+import org.eigenbase.rel.rules.LoptOptimizeJoinRule;
+import org.eigenbase.rel.rules.MergeFilterRule;
+import org.eigenbase.rel.rules.PushFilterPastProjectRule;
+import org.eigenbase.rel.rules.PushFilterPastSetOpRule;
+import org.eigenbase.rel.rules.SemiJoinRel;
+import org.eigenbase.rel.rules.TransitivePredicatesOnJoinRule;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptPlanner;
+import org.eigenbase.relopt.RelOptQuery;
+import org.eigenbase.relopt.RelOptRule;
+import org.eigenbase.relopt.RelOptSchema;
+import org.eigenbase.relopt.RelOptUtil;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.relopt.hep.HepMatchOrder;
+import org.eigenbase.relopt.hep.HepPlanner;
+import org.eigenbase.relopt.hep.HepProgram;
+import org.eigenbase.relopt.hep.HepProgramBuilder;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeFactory;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexBuilder;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexWindowBound;
+import org.eigenbase.rex.RexFieldCollation;
+import org.eigenbase.sql.SqlAggFunction;
+import org.eigenbase.sql.SqlWindow;
+import org.eigenbase.sql.parser.SqlParserPos;
+import org.eigenbase.sql.type.SqlTypeName;
+import org.eigenbase.sql2rel.RelFieldTrimmer;
+import org.eigenbase.sql.SqlCall;
+import org.eigenbase.sql.SqlExplainLevel;
+import org.eigenbase.sql.SqlKind;
+import org.eigenbase.sql.SqlNode;
+import org.eigenbase.sql.SqlLiteral;
+import org.eigenbase.util.CompositeList;
+import org.eigenbase.util.ImmutableIntList;
+import org.eigenbase.util.Pair;
+
+import com.google.common.base.Function;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableList.Builder;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
/**
* Implementation of the semantic analyzer. It generates the query plan.
@@ -265,6 +356,9 @@ public class SemanticAnalyzer extends Ba
//flag for partial scan during analyze ... compute statistics
protected boolean partialscan;
+ private volatile boolean runCBO = true;
+ private volatile boolean disableJoinMerge = false;
+
/*
* Capture the CTE definitions in a Query.
*/
@@ -279,6 +373,11 @@ public class SemanticAnalyzer extends Ba
int nextNum;
}
+ protected SemanticAnalyzer(HiveConf conf, boolean runCBO) throws SemanticException {
+ this(conf);
+ this.runCBO = runCBO;
+ }
+
public SemanticAnalyzer(HiveConf conf) throws SemanticException {
super(conf);
opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>();
@@ -315,8 +414,11 @@ public class SemanticAnalyzer extends Ba
}
@Override
- protected void reset() {
- super.reset();
+ protected void reset(boolean clearPartsCache) {
+ super.reset(true);
+ if(clearPartsCache) {
+ prunedPartitions.clear();
+ }
loadTableWork.clear();
loadFileWork.clear();
topOps.clear();
@@ -330,7 +432,7 @@ public class SemanticAnalyzer extends Ba
smbMapJoinContext.clear();
opParseCtx.clear();
groupOpToInputTables.clear();
- prunedPartitions.clear();
+ disableJoinMerge = false;
aliasToCTEs.clear();
topToTable.clear();
opToPartPruner.clear();
@@ -344,8 +446,6 @@ public class SemanticAnalyzer extends Ba
viewsExpanded = null;
viewSelect = null;
ctesExpanded = null;
- noscan = false;
- partialscan = false;
globalLimitCtx.disableOpt();
viewAliasToInput.clear();
reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
@@ -354,7 +454,6 @@ public class SemanticAnalyzer extends Ba
unparseTranslator.clear();
queryProperties.clear();
outputs.clear();
- globalLimitCtx.reset();
}
public void initParseCtx(ParseContext pctx) {
@@ -957,9 +1056,7 @@ public class SemanticAnalyzer extends Ba
private boolean isJoinToken(ASTNode node) {
if ((node.getToken().getType() == HiveParser.TOK_JOIN)
|| (node.getToken().getType() == HiveParser.TOK_CROSSJOIN)
- || (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
- || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
- || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN)
+ || isOuterJoinToken(node)
|| (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
|| (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
return true;
@@ -968,6 +1065,12 @@ public class SemanticAnalyzer extends Ba
return false;
}
+ private boolean isOuterJoinToken(ASTNode node) {
+ return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN);
+ }
+
/**
* Given the AST with TOK_JOIN as the root, get all the aliases for the tables
* or subqueries in the join.
@@ -985,6 +1088,7 @@ public class SemanticAnalyzer extends Ba
"Join with multiple children"));
}
+ queryProperties.incrementJoinCount(isOuterJoinToken(join));
for (int num = 0; num < numChildren; num++) {
ASTNode child = (ASTNode) join.getChild(num);
if (child.getToken().getType() == HiveParser.TOK_TABREF) {
@@ -1091,10 +1195,15 @@ public class SemanticAnalyzer extends Ba
qb.countSel();
qbp.setSelExprForClause(ctx_1.dest, ast);
+ int posn = 0;
if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) {
qbp.setHints((ASTNode) ast.getChild(0));
+ posn++;
}
+ if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM))
+ queryProperties.setUsesScript(true);
+
LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast,
qb, ctx_1.dest);
doPhase1GetColumnAliasesFromSelect(ast, qbp);
@@ -1105,6 +1214,8 @@ public class SemanticAnalyzer extends Ba
case HiveParser.TOK_WHERE:
qbp.setWhrExprForClause(ctx_1.dest, ast);
+ if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty())
+ queryProperties.setFilterWithSubQuery(true);
break;
case HiveParser.TOK_INSERT_INTO:
@@ -1127,6 +1238,9 @@ public class SemanticAnalyzer extends Ba
}
}
qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
+
+ if (qbp.getClauseNamesForDest().size() > 1)
+ queryProperties.setMultiDestQuery(true);
break;
case HiveParser.TOK_FROM:
@@ -1150,9 +1264,9 @@ public class SemanticAnalyzer extends Ba
processSubQuery(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
+ queryProperties.setHasLateralViews(true);
processLateralView(qb, frm);
} else if (isJoinToken(frm)) {
- queryProperties.setHasJoin(true);
processJoin(qb, frm);
qbp.setJoinExpr(frm);
}else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
@@ -1365,6 +1479,10 @@ public class SemanticAnalyzer extends Ba
}
}
+ public Table getTable(TableScanOperator ts) {
+ return topToTable.get(ts);
+ }
+
public void getMetaData(QB qb) throws SemanticException {
getMetaData(qb, null);
}
@@ -2310,7 +2428,7 @@ public class SemanticAnalyzer extends Ba
output = putOpInsertMap(output, inputRR);
return output;
}
-
+
private Operator genPlanForSubQueryPredicate(
QB qbSQ,
ISubQueryJoinInfo subQueryPredicate) throws SemanticException {
@@ -9444,7 +9562,9 @@ public class SemanticAnalyzer extends Ba
aliasToOpInfo );
}
}
- mergeJoinTree(qb);
+
+ if (!disableJoinMerge)
+ mergeJoinTree(qb);
}
// if any filters are present in the join tree, push them on top of the
@@ -9652,9 +9772,9 @@ public class SemanticAnalyzer extends Ba
}
@Override
- public void init() {
+ public void init(boolean clearPartsCache) {
// clear most members
- reset();
+ reset(clearPartsCache);
// init
QB qb = new QB(null, null, false);
@@ -9709,11 +9829,82 @@ public class SemanticAnalyzer extends Ba
getMetaData(qb);
LOG.info("Completed getting MetaData in Semantic Analysis");
+
+ if (runCBO) {
+ boolean tokenTypeIsQuery = ast.getToken().getType() == HiveParser.TOK_QUERY
+ || ast.getToken().getType() == HiveParser.TOK_EXPLAIN;
+ if (!tokenTypeIsQuery || createVwDesc != null
+ || !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED)
+ || !canHandleQuery(qb, true) || !HiveOptiqUtil.validateASTForCBO(ast)) {
+ runCBO = false;
+ }
+
+ if (runCBO) {
+ disableJoinMerge = true;
+ }
+ }
+
// Save the result schema derived from the sink operator produced
// by genPlan. This has the correct column names, which clients
// such as JDBC would prefer instead of the c0, c1 we'll end
// up with later.
- Operator sinkOp = genPlan(qb);
+ Operator sinkOp = null;
+
+ if (runCBO) {
+ OptiqBasedPlanner optiqPlanner = new OptiqBasedPlanner();
+ boolean reAnalyzeAST = false;
+
+ try {
+ // 1. Gen Optimized AST
+ ASTNode newAST = optiqPlanner.getOptimizedAST(prunedPartitions);
+
+ // 2. Regen OP plan from optimized AST
+ init(false);
+ ctx_1 = initPhase1Ctx();
+ if (!doPhase1(newAST, qb, ctx_1)) {
+ throw new RuntimeException(
+ "Couldn't do phase1 on CBO optimized query plan");
+ }
+ // unfortunately making prunedPartitions immutable is not possible here
+ // with SemiJoins not all tables are costed in CBO,
+ // so their PartitionList is not evaluated until the run phase.
+ //prunedPartitions = ImmutableMap.copyOf(prunedPartitions);
+ getMetaData(qb);
+
+ disableJoinMerge = true;
+ sinkOp = genPlan(qb);
+ LOG.info("CBO Succeeded; optimized logical plan.");
+ LOG.debug(newAST.dump());
+
+ /*
+ * Use non CBO Result Set Schema so as to preserve user specified names.
+ * Hive seems to have bugs with OB/LIMIT in sub queries. // 3. Reset
+ * result set schema resultSchema =
+ * convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp)
+ * .getRowResolver(), true);
+ */
+ } catch (Exception e) {
+ LOG.error("CBO failed, skipping CBO. ", e);
+ if (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) ||
+ (optiqPlanner.noColsMissingStats.get() > 0) ||
+ e instanceof OptiqSemanticException) {
+ reAnalyzeAST = true;
+ } else {
+ throw e instanceof SemanticException ? (SemanticException) e : new SemanticException(e);
+ }
+ } finally {
+ runCBO = false;
+ disableJoinMerge = false;
+ if (reAnalyzeAST) {
+ init(true);
+ prunedPartitions.clear();
+ analyzeInternal(ast);
+ return;
+ }
+ }
+ } else {
+ sinkOp = genPlan(qb);
+ }
if (createVwDesc != null)
resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
@@ -11953,4 +12144,1931 @@ public class SemanticAnalyzer extends Ba
return false;
}
+ /**** Temporary Place Holder For Optiq plan Gen, Optimizer ****/
+
+ /*
+ * Entry point to Optimizations using Optiq.
+ */
+ private boolean canHandleQuery(QB qbToChk, boolean topLevelQB) {
+ boolean runOptiqPlanner = false;
+ // Assumption:
+ // 1. If top level QB is query then everything below it must also be Query
+ // 2. Nested Subquery will return false for qbToChk.getIsQuery()
+ if ((!topLevelQB || qbToChk.getIsQuery())
+ && (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) || conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict"))
+ && (!topLevelQB || (queryProperties.getJoinCount() > 1) || conf.getBoolVar(ConfVars.HIVE_IN_TEST))
+ && !queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy()
+ && !queryProperties.hasSortBy() && !queryProperties.hasPTF()
+ && !queryProperties.usesScript() && !queryProperties.hasMultiDestQuery()
+ && !queryProperties.hasLateralViews()) {
+ runOptiqPlanner = true;
+ } else {
+ LOG.info("Can not invoke CBO; query contains operators not supported for CBO.");
+ }
+
+ return runOptiqPlanner;
+ }
+
+ private class OptiqBasedPlanner implements Frameworks.PlannerAction<RelNode> {
+ RelOptCluster cluster;
+ RelOptSchema relOptSchema;
+ SemanticException semanticException;
+ Map<String, PrunedPartitionList> partitionCache;
+ AtomicInteger noColsMissingStats = new AtomicInteger(0);
+ List<FieldSchema> topLevelFieldSchema;
+
+ // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or
+ // just last one.
+ LinkedHashMap<RelNode, RowResolver> relToHiveRR = new LinkedHashMap<RelNode, RowResolver>();
+ LinkedHashMap<RelNode, ImmutableMap<String, Integer>> relToHiveColNameOptiqPosMap = new LinkedHashMap<RelNode, ImmutableMap<String, Integer>>();
+
+ private ASTNode getOptimizedAST(Map<String, PrunedPartitionList> partitionCache)
+ throws SemanticException {
+ ASTNode optiqOptimizedAST = null;
+ RelNode optimizedOptiqPlan = null;
+ this.partitionCache = partitionCache;
+
+ try {
+ optimizedOptiqPlan = Frameworks.withPlanner(this,
+ Frameworks.newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build());
+ } catch (Exception e) {
+ if (semanticException != null)
+ throw semanticException;
+ else
+ throw new RuntimeException(e);
+ }
+ optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, topLevelFieldSchema);
+
+ return optiqOptimizedAST;
+ }
+
+ @Override
+ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) {
+ RelNode optiqGenPlan = null;
+ RelNode optiqPreCboPlan = null;
+ RelNode optiqOptimizedPlan = null;
+
+ /*
+ * recreate cluster, so that it picks up the additional traitDef
+ */
+ RelOptPlanner planner = HiveVolcanoPlanner.createPlanner();
+ final RelOptQuery query = new RelOptQuery(planner);
+ final RexBuilder rexBuilder = cluster.getRexBuilder();
+ cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder);
+
+ this.cluster = cluster;
+ this.relOptSchema = relOptSchema;
+
+ try {
+ optiqGenPlan = genLogicalPlan(qb);
+ topLevelFieldSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(optiqGenPlan),
+ HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
+ } catch (SemanticException e) {
+ semanticException = e;
+ throw new RuntimeException(e);
+ }
+
+ optiqPreCboPlan = applyPreCBOTransforms(optiqGenPlan, HiveDefaultRelMetadataProvider.INSTANCE);
+ List<RelMetadataProvider> list = Lists.newArrayList();
+ list.add(HiveDefaultRelMetadataProvider.INSTANCE);
+ RelTraitSet desiredTraits = cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
+
+ HepProgram hepPgm = null;
+ HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP)
+ .addRuleInstance(new ConvertMultiJoinRule(HiveJoinRel.class));
+ hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoinRel.HIVE_JOIN_FACTORY,
+ HiveProjectRel.DEFAULT_PROJECT_FACTORY, HiveFilterRel.DEFAULT_FILTER_FACTORY));
+
+ hepPgm = hepPgmBldr.build();
+ HepPlanner hepPlanner = new HepPlanner(hepPgm);
+
+ hepPlanner.registerMetadataProviders(list);
+ RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list);
+ cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner));
+
+ RelNode rootRel = optiqPreCboPlan;
+ hepPlanner.setRoot(rootRel);
+ if (!optiqPreCboPlan.getTraitSet().equals(desiredTraits)) {
+ rootRel = hepPlanner.changeTraits(optiqPreCboPlan, desiredTraits);
+ }
+ hepPlanner.setRoot(rootRel);
+
+ optiqOptimizedPlan = hepPlanner.findBestExp();
+
+ if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) {
+ LOG.debug("CBO Planning details:\n");
+ LOG.debug("Original Plan:\n");
+ LOG.debug(RelOptUtil.toString(optiqGenPlan));
+ LOG.debug("Plan After PPD, PartPruning, ColumnPruning:\n");
+ LOG.debug(RelOptUtil.toString(optiqPreCboPlan));
+ LOG.debug("Plan After Join Reordering:\n");
+ LOG.debug(RelOptUtil.toString(optiqOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES));
+ }
+
+ return optiqOptimizedPlan;
+ }
+
+ public RelNode applyPreCBOTransforms(RelNode basePlan, RelMetadataProvider mdProvider) {
+
+ // TODO: Decorelation of subquery should be done before attempting
+ // Partition Pruning; otherwise Expression evaluation may try to execute
+ // corelated sub query.
+ basePlan = hepPlan(basePlan, true, mdProvider, new PushFilterPastProjectRule(
+ FilterRelBase.class, HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveProjectRel.class,
+ HiveProjectRel.DEFAULT_PROJECT_FACTORY), new PushFilterPastSetOpRule(
+ HiveFilterRel.DEFAULT_FILTER_FACTORY), new MergeFilterRule(
+ HiveFilterRel.DEFAULT_FILTER_FACTORY), HivePushFilterPastJoinRule.JOIN,
+ HivePushFilterPastJoinRule.FILTER_ON_JOIN);
+
+ basePlan = hepPlan(basePlan, false, mdProvider, new TransitivePredicatesOnJoinRule(
+ JoinRelBase.class, HiveFilterRel.DEFAULT_FILTER_FACTORY),
+ // TODO: Enable it after OPTIQ-407 is fixed
+ //RemoveTrivialProjectRule.INSTANCE,
+ new HivePartitionPrunerRule(SemanticAnalyzer.this.conf));
+
+ RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProjectRel.DEFAULT_PROJECT_FACTORY,
+ HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveJoinRel.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY,
+ HiveSortRel.HIVE_SORT_REL_FACTORY, HiveAggregateRel.HIVE_AGGR_REL_FACTORY, HiveUnionRel.UNION_REL_FACTORY);
+ basePlan = fieldTrimmer.trim(basePlan);
+
+ return basePlan;
+ }
+
+ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges,
+ RelMetadataProvider mdProvider, RelOptRule... rules) {
+
+ RelNode optimizedRelNode = basePlan;
+ HepProgramBuilder programBuilder = new HepProgramBuilder();
+ if (followPlanChanges) {
+ programBuilder.addMatchOrder(HepMatchOrder.TOP_DOWN);
+ programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules));
+ } else {
+ // TODO: Should this be also TOP_DOWN?
+ for (RelOptRule r : rules)
+ programBuilder.addRuleInstance(r);
+ }
+
+ HepPlanner planner = new HepPlanner(programBuilder.build());
+ List<RelMetadataProvider> list = Lists.newArrayList();
+ list.add(mdProvider);
+ planner.registerMetadataProviders(list);
+ RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list);
+ basePlan.getCluster().setMetadataProvider(
+ new CachingRelMetadataProvider(chainedProvider, planner));
+
+ planner.setRoot(basePlan);
+ optimizedRelNode = planner.findBestExp();
+
+ return optimizedRelNode;
+ }
+
+ @SuppressWarnings("nls")
+ private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel,
+ String rightalias, RelNode rightRel) throws SemanticException {
+ HiveUnionRel unionRel = null;
+
+ // 1. Get Row Resolvers, Column map for original left and right input of
+ // Union Rel
+ RowResolver leftRR = this.relToHiveRR.get(leftRel);
+ RowResolver rightRR = this.relToHiveRR.get(rightRel);
+ HashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias);
+ HashMap<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias);
+
+ // 2. Validate that Union is feasible according to Hive (by using type
+ // info from RR)
+ if (leftmap.size() != rightmap.size()) {
+ throw new SemanticException("Schema of both sides of union should match.");
+ }
+
+ ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias(
+ qb.getAliases().get(0));
+ for (Map.Entry<String, ColumnInfo> lEntry : leftmap.entrySet()) {
+ String field = lEntry.getKey();
+ ColumnInfo lInfo = lEntry.getValue();
+ ColumnInfo rInfo = rightmap.get(field);
+ if (rInfo == null) {
+ throw new SemanticException(generateErrorMessage(tabref,
+ "Schema of both sides of union should match. " + rightalias
+ + " does not have the field " + field));
+ }
+ if (lInfo == null) {
+ throw new SemanticException(generateErrorMessage(tabref,
+ "Schema of both sides of union should match. " + leftalias
+ + " does not have the field " + field));
+ }
+ if (!lInfo.getInternalName().equals(rInfo.getInternalName())) {
+ throw new SemanticException(generateErrorMessage(tabref,
+ "Schema of both sides of union should match: field " + field + ":"
+ + " appears on the left side of the UNION at column position: "
+ + getPositionFromInternalName(lInfo.getInternalName())
+ + ", and on the right side of the UNION at column position: "
+ + getPositionFromInternalName(rInfo.getInternalName())
+ + ". Column positions should match for a UNION"));
+ }
+ // try widening coversion, otherwise fail union
+ TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(),
+ rInfo.getType());
+ if (commonTypeInfo == null) {
+ throw new SemanticException(generateErrorMessage(tabref,
+ "Schema of both sides of union should match: Column " + field + " is of type "
+ + lInfo.getType().getTypeName() + " on first table and type "
+ + rInfo.getType().getTypeName() + " on second table"));
+ }
+ }
+
+ // 3. construct Union Output RR using original left & right Input
+ RowResolver unionoutRR = new RowResolver();
+ for (Map.Entry<String, ColumnInfo> lEntry : leftmap.entrySet()) {
+ String field = lEntry.getKey();
+ ColumnInfo lInfo = lEntry.getValue();
+ ColumnInfo rInfo = rightmap.get(field);
+ ColumnInfo unionColInfo = new ColumnInfo(lInfo);
+ unionColInfo.setTabAlias(unionalias);
+ unionColInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(),
+ rInfo.getType()));
+ unionoutRR.put(unionalias, field, unionColInfo);
+ }
+
+ // 4. Determine which columns requires cast on left/right input (Optiq
+ // requires exact types on both sides of union)
+ boolean leftNeedsTypeCast = false;
+ boolean rightNeedsTypeCast = false;
+ List<RexNode> leftProjs = new ArrayList<RexNode>();
+ List<RexNode> rightProjs = new ArrayList<RexNode>();
+ List<RelDataTypeField> leftRowDT = leftRel.getRowType().getFieldList();
+ List<RelDataTypeField> rightRowDT = rightRel.getRowType().getFieldList();
+
+ RelDataType leftFieldDT;
+ RelDataType rightFieldDT;
+ RelDataType unionFieldDT;
+ for (int i = 0; i < leftRowDT.size(); i++) {
+ leftFieldDT = leftRowDT.get(i).getType();
+ rightFieldDT = rightRowDT.get(i).getType();
+ if (!leftFieldDT.equals(rightFieldDT)) {
+ unionFieldDT = TypeConverter.convert(unionoutRR.getColumnInfos().get(i).getType(),
+ cluster.getTypeFactory());
+ if (!unionFieldDT.equals(leftFieldDT)) {
+ leftNeedsTypeCast = true;
+ }
+ leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT,
+ cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true));
+
+ if (!unionFieldDT.equals(rightFieldDT)) {
+ rightNeedsTypeCast = true;
+ }
+ rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT,
+ cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true));
+ } else {
+ leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldDT,
+ cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true));
+ rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldDT,
+ cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true));
+ }
+ }
+
+ // 5. Introduce Project Rel above original left/right inputs if cast is
+ // needed for type parity
+ RelNode unionLeftInput = leftRel;
+ RelNode unionRightInput = rightRel;
+ if (leftNeedsTypeCast) {
+ unionLeftInput = HiveProjectRel.create(leftRel, leftProjs, leftRel.getRowType()
+ .getFieldNames());
+ }
+ if (rightNeedsTypeCast) {
+ unionRightInput = HiveProjectRel.create(rightRel, rightProjs, rightRel.getRowType()
+ .getFieldNames());
+ }
+
+ // 6. Construct Union Rel
+ ImmutableList.Builder bldr = new ImmutableList.Builder<RelNode>();
+ bldr.add(unionLeftInput);
+ bldr.add(unionRightInput);
+ unionRel = new HiveUnionRel(cluster, TraitsUtil.getDefaultTraitSet(cluster),
+ bldr.build());
+
+ relToHiveRR.put(unionRel, unionoutRR);
+ relToHiveColNameOptiqPosMap.put(unionRel,
+ this.buildHiveToOptiqColumnMap(unionoutRR, unionRel));
+
+ return unionRel;
+ }
+
+ private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJoinType,
+ ASTNode joinCond) throws SemanticException {
+ RelNode joinRel = null;
+
+ // 1. construct the RowResolver for the new Join Node by combining row
+ // resolvers from left, right
+ RowResolver leftRR = this.relToHiveRR.get(leftRel);
+ RowResolver rightRR = this.relToHiveRR.get(rightRel);
+ RowResolver joinRR = null;
+
+ if (hiveJoinType != JoinType.LEFTSEMI) {
+ joinRR = RowResolver.getCombinedRR(leftRR, rightRR);
+ } else {
+ joinRR = new RowResolver();
+ RowResolver.add(joinRR, leftRR, 0);
+ }
+
+ // 2. Construct ExpressionNodeDesc representing Join Condition
+ RexNode optiqJoinCond = null;
+ if (joinCond != null) {
+ JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType);
+ Map<ASTNode, ExprNodeDesc> exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond,
+ jCtx);
+ if (jCtx.getError() != null)
+ throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(),
+ jCtx.getError()));
+
+ ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond);
+
+ List<RelNode> inputRels = new ArrayList<RelNode>();
+ inputRels.add(leftRel);
+ inputRels.add(rightRel);
+ optiqJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels,
+ relToHiveRR, relToHiveColNameOptiqPosMap, false);
+ } else {
+ optiqJoinCond = cluster.getRexBuilder().makeLiteral(true);
+ }
+
+ // 3. Validate that join condition is legal (i.e no function refering to
+ // both sides of join, only equi join)
+ // TODO: Join filter handling (only supported for OJ by runtime or is it
+ // supported for IJ as well)
+
+ // 4. Construct Join Rel Node
+ boolean leftSemiJoin = false;
+ JoinRelType optiqJoinType;
+ switch (hiveJoinType) {
+ case LEFTOUTER:
+ optiqJoinType = JoinRelType.LEFT;
+ break;
+ case RIGHTOUTER:
+ optiqJoinType = JoinRelType.RIGHT;
+ break;
+ case FULLOUTER:
+ optiqJoinType = JoinRelType.FULL;
+ break;
+ case LEFTSEMI:
+ optiqJoinType = JoinRelType.INNER;
+ leftSemiJoin = true;
+ break;
+ case INNER:
+ default:
+ optiqJoinType = JoinRelType.INNER;
+ break;
+ }
+
+ if (leftSemiJoin) {
+ List<RelDataTypeField> sysFieldList = new ArrayList<RelDataTypeField>();
+ List<RexNode> leftJoinKeys = new ArrayList<RexNode>();
+ List<RexNode> rightJoinKeys = new ArrayList<RexNode>();
+
+ RexNode nonEquiConds = RelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel,
+ optiqJoinCond, leftJoinKeys, rightJoinKeys, null, null);
+
+ if (!nonEquiConds.isAlwaysTrue()) {
+ throw new SemanticException("Non equality condition not supported in Semi-Join"
+ + nonEquiConds);
+ }
+
+ RelNode[] inputRels = new RelNode[] { leftRel, rightRel };
+ final List<Integer> leftKeys = new ArrayList<Integer>();
+ final List<Integer> rightKeys = new ArrayList<Integer>();
+ optiqJoinCond = HiveOptiqUtil.projectNonColumnEquiConditions(
+ HiveProjectRel.DEFAULT_PROJECT_FACTORY, inputRels, leftJoinKeys, rightJoinKeys, 0,
+ leftKeys, rightKeys);
+
+ joinRel = new SemiJoinRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION),
+ inputRels[0], inputRels[1], optiqJoinCond, ImmutableIntList.copyOf(leftKeys),
+ ImmutableIntList.copyOf(rightKeys));
+ } else {
+ joinRel = HiveJoinRel.getJoin(cluster, leftRel, rightRel, optiqJoinCond, optiqJoinType,
+ leftSemiJoin);
+ }
+ // 5. Add new JoinRel & its RR to the maps
+ relToHiveColNameOptiqPosMap.put(joinRel, this.buildHiveToOptiqColumnMap(joinRR, joinRel));
+ relToHiveRR.put(joinRel, joinRR);
+
+ return joinRel;
+ }
+
+ /**
+ * Generate Join Logical Plan Relnode by walking through the join AST.
+ *
+ * @param qb
+ * @param aliasToRel
+ * Alias(Table/Relation alias) to RelNode; only read and not
+ * written in to by this method
+ * @return
+ * @throws SemanticException
+ */
+ private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map<String, RelNode> aliasToRel)
+ throws SemanticException {
+ RelNode leftRel = null;
+ RelNode rightRel = null;
+ JoinType hiveJoinType = null;
+
+ if (joinParseTree.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) {
+ String msg = String.format("UNIQUE JOIN is currently not supported in CBO,"
+ + " turn off cbo to use UNIQUE JOIN.");
+ LOG.debug(msg);
+ throw new OptiqSemanticException(msg);
+ }
+
+ // 1. Determine Join Type
+ // TODO: What about TOK_CROSSJOIN, TOK_MAPJOIN
+ switch (joinParseTree.getToken().getType()) {
+ case HiveParser.TOK_LEFTOUTERJOIN:
+ hiveJoinType = JoinType.LEFTOUTER;
+ break;
+ case HiveParser.TOK_RIGHTOUTERJOIN:
+ hiveJoinType = JoinType.RIGHTOUTER;
+ break;
+ case HiveParser.TOK_FULLOUTERJOIN:
+ hiveJoinType = JoinType.FULLOUTER;
+ break;
+ case HiveParser.TOK_LEFTSEMIJOIN:
+ hiveJoinType = JoinType.LEFTSEMI;
+ break;
+ default:
+ hiveJoinType = JoinType.INNER;
+ break;
+ }
+
+ // 2. Get Left Table Alias
+ ASTNode left = (ASTNode) joinParseTree.getChild(0);
+ if ((left.getToken().getType() == HiveParser.TOK_TABREF)
+ || (left.getToken().getType() == HiveParser.TOK_SUBQUERY)
+ || (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
+ String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0))
+ .toLowerCase();
+ String leftTableAlias = left.getChildCount() == 1 ? tableName : unescapeIdentifier(left
+ .getChild(left.getChildCount() - 1).getText().toLowerCase());
+ // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
+ // partitionTableFunctionSource partitioningSpec? expression*)
+ // guranteed to have an lias here: check done in processJoin
+ leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(left
+ .getChild(1).getText().toLowerCase())
+ : leftTableAlias;
+ leftRel = aliasToRel.get(leftTableAlias);
+ } else if (isJoinToken(left)) {
+ leftRel = genJoinLogicalPlan(left, aliasToRel);
+ } else {
+ assert (false);
+ }
+
+ // 3. Get Right Table Alias
+ ASTNode right = (ASTNode) joinParseTree.getChild(1);
+ if ((right.getToken().getType() == HiveParser.TOK_TABREF)
+ || (right.getToken().getType() == HiveParser.TOK_SUBQUERY)
+ || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
+ String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0))
+ .toLowerCase();
+ String rightTableAlias = right.getChildCount() == 1 ? tableName : unescapeIdentifier(right
+ .getChild(right.getChildCount() - 1).getText().toLowerCase());
+ // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
+ // partitionTableFunctionSource partitioningSpec? expression*)
+ // guranteed to have an lias here: check done in processJoin
+ rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(right
+ .getChild(1).getText().toLowerCase())
+ : rightTableAlias;
+ rightRel = aliasToRel.get(rightTableAlias);
+ } else {
+ assert (false);
+ }
+
+ // 4. Get Join Condn
+ ASTNode joinCond = (ASTNode) joinParseTree.getChild(2);
+
+ // 5. Create Join rel
+ return genJoinRelNode(leftRel, rightRel, hiveJoinType, joinCond);
+ }
+
+ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticException {
+ RowResolver rr = new RowResolver();
+ HiveTableScanRel tableRel = null;
+
+ try {
+
+ // 1. If the table has a Sample specified, bail from Optiq path.
+ if ( qb.getParseInfo().getTabSample(tableAlias) != null ||
+ SemanticAnalyzer.this.nameToSplitSample.containsKey(tableAlias)) {
+ String msg = String.format("Table Sample specified for %s." +
+ " Currently we don't support Table Sample clauses in CBO," +
+ " turn off cbo for queries on tableSamples.", tableAlias);
+ LOG.debug(msg);
+ throw new OptiqSemanticException(msg);
+ }
+
+ // 2. Get Table Metadata
+ Table tab = qb.getMetaData().getSrcForAlias(tableAlias);
+
+ // 3. Get Table Logical Schema (Row Type)
+ // NOTE: Table logical schema = Non Partition Cols + Partition Cols +
+ // Virtual Cols
+
+ // 3.1 Add Column info for non partion cols (Object Inspector fields)
+ StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer()
+ .getObjectInspector();
+ List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
+ ColumnInfo colInfo;
+ String colName;
+ ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>();
+ for (int i = 0; i < fields.size(); i++) {
+ colName = fields.get(i).getFieldName();
+ colInfo = new ColumnInfo(
+ fields.get(i).getFieldName(),
+ TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()),
+ tableAlias, false);
+ colInfo.setSkewedCol((isSkewedCol(tableAlias, qb, colName)) ? true : false);
+ rr.put(tableAlias, colName, colInfo);
+ cInfoLst.add(colInfo);
+ }
+ // TODO: Fix this
+ ArrayList<ColumnInfo> nonPartitionColumns = new ArrayList<ColumnInfo>(cInfoLst);
+ ArrayList<ColumnInfo> partitionColumns = new ArrayList<ColumnInfo>();
+
+ // 3.2 Add column info corresponding to partition columns
+ for (FieldSchema part_col : tab.getPartCols()) {
+ colName = part_col.getName();
+ colInfo = new ColumnInfo(colName,
+ TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true);
+ rr.put(tableAlias, colName, colInfo);
+ cInfoLst.add(colInfo);
+ partitionColumns.add(colInfo);
+ }
+
+ // 3.3 Add column info corresponding to virtual columns
+ Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
+ while (vcs.hasNext()) {
+ VirtualColumn vc = vcs.next();
+ colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true,
+ vc.getIsHidden());
+ rr.put(tableAlias, vc.getName(), colInfo);
+ cInfoLst.add(colInfo);
+ }
+
+ // 3.4 Build row type from field <type, name>
+ RelDataType rowType = TypeConverter.getType(cluster, rr, null);
+
+ // 4. Build RelOptAbstractTable
+ RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, tableAlias, rowType, tab,
+ nonPartitionColumns, partitionColumns, conf, partitionCache, noColsMissingStats);
+
+ // 5. Build Hive Table Scan Rel
+ tableRel = new HiveTableScanRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION),
+ optTable, rowType);
+
+ // 6. Add Schema(RR) to RelNode-Schema map
+ ImmutableMap<String, Integer> hiveToOptiqColMap = buildHiveToOptiqColumnMap(rr, tableRel);
+ relToHiveRR.put(tableRel, rr);
+ relToHiveColNameOptiqPosMap.put(tableRel, hiveToOptiqColMap);
+ } catch (Exception e) {
+ if ( e instanceof SemanticException) {
+ throw (SemanticException) e;
+ } else {
+ throw (new RuntimeException(e));
+ }
+ }
+
+ return tableRel;
+ }
+
+ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel) throws SemanticException {
+ ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel));
+ ImmutableMap<String, Integer> hiveColNameOptiqPosMap = this.relToHiveColNameOptiqPosMap
+ .get(srcRel);
+ RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(),
+ hiveColNameOptiqPosMap, 0, true).convert(filterCondn);
+ RelNode filterRel = new HiveFilterRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION),
+ srcRel, convertedFilterExpr);
+ this.relToHiveColNameOptiqPosMap.put(filterRel, hiveColNameOptiqPosMap);
+ relToHiveRR.put(filterRel, relToHiveRR.get(srcRel));
+ relToHiveColNameOptiqPosMap.put(filterRel, hiveColNameOptiqPosMap);
+
+ return filterRel;
+ }
+
+ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
+ Map<String, RelNode> aliasToRel, boolean forHavingClause) throws SemanticException {
+ /*
+ * Handle Subquery predicates.
+ *
+ * Notes (8/22/14 hb): Why is this a copy of the code from {@link
+ * #genFilterPlan} - for now we will support the same behavior as non CBO
+ * route. - but plan to allow nested SubQueries(Restriction.9.m) and
+ * multiple SubQuery expressions(Restriction.8.m). This requires use to
+ * utilize Optiq's Decorrelation mechanics, and for Optiq to fix/flush out
+ * Null semantics(OPTIQ-373) - besides only the driving code has been
+ * copied. Most of the code which is SubQueryUtils and QBSubQuery is
+ * reused.
+ */
+ int numSrcColumns = srcRel.getRowType().getFieldCount();
+ List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
+ if (subQueriesInOriginalTree.size() > 0) {
+
+ /*
+ * Restriction.9.m :: disallow nested SubQuery expressions.
+ */
+ if (qb.getSubQueryPredicateDef() != null) {
+ throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported."));
+ }
+
+ /*
+ * Restriction.8.m :: We allow only 1 SubQuery expression per Query.
+ */
+ if (subQueriesInOriginalTree.size() > 1) {
+
+ throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
+ }
+
+ /*
+ * Clone the Search AST; apply all rewrites on the clone.
+ */
+ ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
+ List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
+
+ RowResolver inputRR = relToHiveRR.get(srcRel);
+ RowResolver outerQBRR = inputRR;
+ ImmutableMap<String, Integer> outerQBPosMap =
+ relToHiveColNameOptiqPosMap.get(srcRel);
+
+ for (int i = 0; i < subQueries.size(); i++) {
+ ASTNode subQueryAST = subQueries.get(i);
+ ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
+
+ int sqIdx = qb.incrNumSubQueryPredicates();
+ clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
+
+ QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST,
+ originalSubQueryAST, ctx);
+
+ if (!forHavingClause) {
+ qb.setWhereClauseSubQueryPredicate(subQuery);
+ } else {
+ qb.setHavingClauseSubQueryPredicate(subQuery);
+ }
+ String havingInputAlias = null;
+
+ if (forHavingClause) {
+ havingInputAlias = "gby_sq" + sqIdx;
+ aliasToRel.put(havingInputAlias, srcRel);
+ }
+
+ subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias,
+ aliasToRel.keySet());
+
+ QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
+ qbSQ.setSubQueryDef(subQuery.getSubQuery());
+ Phase1Ctx ctx_1 = initPhase1Ctx();
+ doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1);
+ getMetaData(qbSQ);
+ RelNode subQueryRelNode = genLogicalPlan(qbSQ);
+ aliasToRel.put(subQuery.getAlias(), subQueryRelNode);
+ RowResolver sqRR = relToHiveRR.get(subQueryRelNode);
+
+ /*
+ * Check.5.h :: For In and Not In the SubQuery must implicitly or
+ * explicitly only contain one select item.
+ */
+ if (subQuery.getOperator().getType() != SubQueryType.EXISTS
+ && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
+ && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) {
+ throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST,
+ "SubQuery can contain only 1 item in Select List."));
+ }
+
+ /*
+ * If this is a Not In SubQuery Predicate then Join in the Null Check
+ * SubQuery. See QBSubQuery.NotInCheck for details on why and how this
+ * is constructed.
+ */
+ if (subQuery.getNotInCheck() != null) {
+ QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
+ notInCheck.setSQRR(sqRR);
+ QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true);
+ qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery());
+ ctx_1 = initPhase1Ctx();
+ doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1);
+ getMetaData(qbSQ_nic);
+ RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic);
+ aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode);
+ srcRel = genJoinRelNode(srcRel, subQueryNICRelNode,
+ // set explicitly to inner until we figure out SemiJoin use
+ // notInCheck.getJoinType(),
+ JoinType.INNER, notInCheck.getJoinConditionAST());
+ inputRR = relToHiveRR.get(srcRel);
+ if (forHavingClause) {
+ aliasToRel.put(havingInputAlias, srcRel);
+ }
+ }
+
+ /*
+ * Gen Join between outer Operator and SQ op
+ */
+ subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias);
+ srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(),
+ subQuery.getJoinConditionAST());
+ searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
+
+ srcRel = genFilterRelNode(searchCond, srcRel);
+
+ /*
+ * For Not Exists and Not In, add a projection on top of the Left
+ * Outer Join.
+ */
+ if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
+ || subQuery.getOperator().getType() != SubQueryType.NOT_IN) {
+ srcRel = projectLeftOuterSide(srcRel, numSrcColumns);
+ }
+ }
+ relToHiveRR.put(srcRel, outerQBRR);
+ relToHiveColNameOptiqPosMap.put(srcRel, outerQBPosMap);
+ return srcRel;
+ }
+
+ return genFilterRelNode(searchCond, srcRel);
+ }
+
+ private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException {
+ RowResolver iRR = relToHiveRR.get(srcRel);
+ RowResolver oRR = new RowResolver();
+ RowResolver.add(oRR, iRR, 0, numColumns);
+
+ List<RexNode> optiqColLst = new ArrayList<RexNode>();
+ List<String> oFieldNames = new ArrayList<String>();
+ RelDataType iType = srcRel.getRowType();
+
+ for (int i = 0; i < iType.getFieldCount(); i++) {
+ RelDataTypeField fType = iType.getFieldList().get(i);
+ String fName = iType.getFieldNames().get(i);
+ optiqColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i));
+ oFieldNames.add(fName);
+ }
+
+ HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames);
+
+ this.relToHiveColNameOptiqPosMap.put(selRel, buildHiveToOptiqColumnMap(oRR, selRel));
+ this.relToHiveRR.put(selRel, oRR);
+ return selRel;
+ }
+
+ private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map<String, RelNode> aliasToRel,
+ boolean forHavingClause) throws SemanticException {
+ RelNode filterRel = null;
+
+ Iterator<ASTNode> whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values()
+ .iterator();
+ if (whereClauseIterator.hasNext()) {
+ filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel,
+ aliasToRel, forHavingClause);
+ }
+
+ return filterRel;
+ }
+
+ /**
+ * Class to store GenericUDAF related information.
+ */
+ private class AggInfo {
+ private final List<ExprNodeDesc> m_aggParams;
+ private final TypeInfo m_returnType;
+ private final String m_udfName;
+ private final boolean m_distinct;
+
+ private AggInfo(List<ExprNodeDesc> aggParams, TypeInfo returnType, String udfName,
+ boolean isDistinct) {
+ m_aggParams = aggParams;
+ m_returnType = returnType;
+ m_udfName = udfName;
+ m_distinct = isDistinct;
+ }
+ }
+
+ private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List<RexNode> gbChildProjLst,
+ RexNodeConverter converter, HashMap<String, Integer> rexNodeToPosMap,
+ Integer childProjLstIndx) throws SemanticException {
+
+ // 1. Get agg fn ret type in Optiq
+ RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType,
+ this.cluster.getTypeFactory());
+
+ // 2. Convert Agg Fn args and type of args to Optiq
+ // TODO: Does HQL allows expressions as aggregate args or can it only be
+ // projections from child?
+ Integer inputIndx;
+ List<Integer> argList = new ArrayList<Integer>();
+ RexNode rexNd = null;
+ RelDataTypeFactory dtFactory = this.cluster.getTypeFactory();
+ ImmutableList.Builder<RelDataType> aggArgRelDTBldr = new ImmutableList.Builder<RelDataType>();
+ for (ExprNodeDesc expr : agg.m_aggParams) {
+ rexNd = converter.convert(expr);
+ inputIndx = rexNodeToPosMap.get(rexNd.toString());
+ if (inputIndx == null) {
+ gbChildProjLst.add(rexNd);
+ rexNodeToPosMap.put(rexNd.toString(), childProjLstIndx);
+ inputIndx = childProjLstIndx;
+ childProjLstIndx++;
+ }
+ argList.add(inputIndx);
+
+ // TODO: does arg need type cast?
+ aggArgRelDTBldr.add(TypeConverter.convert(expr.getTypeInfo(), dtFactory));
+ }
+
+ // 3. Get Aggregation FN from Optiq given name, ret type and input arg
+ // type
+ final Aggregation aggregation = SqlFunctionConverter.getOptiqAggFn(agg.m_udfName,
+ aggArgRelDTBldr.build(), aggFnRetType);
+
+ return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null);
+ }
+
+ private RelNode genGBRelNode(List<ExprNodeDesc> gbExprs, List<AggInfo> aggInfoLst,
+ RelNode srcRel) throws SemanticException {
+ RowResolver gbInputRR = this.relToHiveRR.get(srcRel);
+ ImmutableMap<String, Integer> posMap = this.relToHiveColNameOptiqPosMap.get(srcRel);
+ RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(),
+ posMap, 0, false);
+
+ final List<RexNode> gbChildProjLst = Lists.newArrayList();
+ final HashMap<String, Integer> rexNodeToPosMap = new HashMap<String, Integer>();
+ final BitSet groupSet = new BitSet();
+ Integer gbIndx = 0;
+ RexNode rnd;
+ for (ExprNodeDesc key : gbExprs) {
+ rnd = converter.convert(key);
+ gbChildProjLst.add(rnd);
+ groupSet.set(gbIndx);
+ rexNodeToPosMap.put(rnd.toString(), gbIndx);
+ gbIndx++;
+ }
+
+ List<AggregateCall> aggregateCalls = Lists.newArrayList();
+ int i = aggInfoLst.size();
+ for (AggInfo agg : aggInfoLst) {
+ aggregateCalls.add(convertGBAgg(agg, srcRel, gbChildProjLst, converter, rexNodeToPosMap,
+ gbChildProjLst.size()));
+ }
+
+ if (gbChildProjLst.isEmpty()) {
+ // This will happen for count(*), in such cases we arbitarily pick
+ // first element from srcRel
+ gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0));
+ }
+ RelNode gbInputRel = HiveProjectRel.create(srcRel, gbChildProjLst, null);
+
+ HiveRel aggregateRel = null;
+ try {
+ aggregateRel = new HiveAggregateRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION),
+ gbInputRel, groupSet, aggregateCalls);
+ } catch (InvalidRelException e) {
+ throw new SemanticException(e);
+ }
+
+ return aggregateRel;
+ }
+
+ private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo,
+ RowResolver gByInputRR, RowResolver gByRR) {
+ if (gByExpr.getType() == HiveParser.DOT
+ && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) {
+ String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getChild(0)
+ .getText());
+ String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(1).getText());
+ gByRR.put(tab_alias, col_alias, colInfo);
+ } else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) {
+ String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getText());
+ String tab_alias = null;
+ /*
+ * If the input to the GBy has a tab alias for the column, then add an
+ * entry based on that tab_alias. For e.g. this query: select b.x,
+ * count(*) from t1 b group by x needs (tab_alias=b, col_alias=x) in the
+ * GBy RR. tab_alias=b comes from looking at the RowResolver that is the
+ * ancestor before any GBy/ReduceSinks added for the GBY operation.
+ */
+ try {
+ ColumnInfo pColInfo = gByInputRR.get(tab_alias, col_alias);
+ tab_alias = pColInfo == null ? null : pColInfo.getTabAlias();
+ } catch (SemanticException se) {
+ }
+ gByRR.put(tab_alias, col_alias, colInfo);
+ }
+ }
+
+ private void addToGBExpr(RowResolver groupByOutputRowResolver,
+ RowResolver groupByInputRowResolver, ASTNode grpbyExpr, ExprNodeDesc grpbyExprNDesc,
+ List<ExprNodeDesc> gbExprNDescLst, List<String> outputColumnNames) {
+ // TODO: Should we use grpbyExprNDesc.getTypeInfo()? what if expr is
+ // UDF
+ int i = gbExprNDescLst.size();
+ String field = getColumnInternalName(i);
+ outputColumnNames.add(field);
+ gbExprNDescLst.add(grpbyExprNDesc);
+
+ ColumnInfo oColInfo = new ColumnInfo(field, grpbyExprNDesc.getTypeInfo(), null, false);
+ groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo);
+
+ addAlternateGByKeyMappings(grpbyExpr, oColInfo, groupByInputRowResolver,
+ groupByOutputRowResolver);
+ }
+
+ private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR)
+ throws SemanticException {
+ AggInfo aInfo = null;
+
+ // 1 Convert UDAF Params to ExprNodeDesc
+ ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
+ for (int i = 1; i <= aggFnLstArgIndx; i++) {
+ ASTNode paraExpr = (ASTNode) aggAst.getChild(i);
+ ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR);
+ aggParameters.add(paraExprNode);
+ }
+
+ // 2. Is this distinct UDAF
+ boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI;
+
+ // 3. Determine type of UDAF
+ TypeInfo udafRetType = null;
+
+ // 3.1 Obtain UDAF name
+ String aggName = unescapeIdentifier(aggAst.getChild(0).getText());
+
+ // 3.2 Rank functions type is 'int'/'double'
+ if (FunctionRegistry.isRankingFunction(aggName)) {
+ if (aggName.equalsIgnoreCase("percent_rank"))
+ udafRetType = TypeInfoFactory.doubleTypeInfo;
+ else
+ udafRetType = TypeInfoFactory.intTypeInfo;
+ } else {
+ // 3.3 Try obtaining UDAF evaluators to determine the ret type
+ try {
+ boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR;
+
+ // 3.3.1 Get UDAF Evaluator
+ Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct);
+
+ GenericUDAFEvaluator genericUDAFEvaluator = null;
+ if (aggName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME)
+ || aggName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) {
+ ArrayList<ObjectInspector> originalParameterTypeInfos =
+ getWritableObjectInspector(aggParameters);
+ genericUDAFEvaluator =
+ FunctionRegistry.getGenericWindowingEvaluator(aggName,
+ originalParameterTypeInfos, isDistinct, isAllColumns);
+ GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
+ udafRetType = ((ListTypeInfo)udaf.returnType).getListElementTypeInfo();
+ } else {
+ genericUDAFEvaluator = getGenericUDAFEvaluator(aggName,
+ aggParameters, aggAst, isDistinct, isAllColumns);
+ assert (genericUDAFEvaluator != null);
+
+ // 3.3.2 Get UDAF Info using UDAF Evaluator
+ GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
+ udafRetType = udaf.returnType;
+ }
+ } catch (Exception e) {
+ LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName
+ + ", trying to translate to GenericUDF");
+ }
+
+ // 3.4 Try GenericUDF translation
+ if (udafRetType == null) {
+ TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
+ // We allow stateful functions in the SELECT list (but nowhere else)
+ tcCtx.setAllowStatefulFunctions(true);
+ tcCtx.setAllowDistinctFunctions(false);
+ ExprNodeDesc exp = genExprNodeDesc((ASTNode) aggAst.getChild(0), inputRR, tcCtx);
+ udafRetType = exp.getTypeInfo();
+ }
+ }
+
+ // 4. Construct AggInfo
+ aInfo = new AggInfo(aggParameters, udafRetType, aggName, isDistinct);
+
+ return aInfo;
+ }
+
+ /**
+ * Generate GB plan.
+ *
+ * @param qb
+ * @param srcRel
+ * @return TODO: 1. Grouping Sets (roll up..)
+ * @throws SemanticException
+ */
+ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
+ RelNode gbRel = null;
+ QBParseInfo qbp = getQBParseInfo(qb);
+
+ // 0. for GSets, Cube, Rollup, bail from Optiq path.
+ if (!qbp.getDestRollups().isEmpty()
+ || !qbp.getDestGroupingSets().isEmpty()
+ || !qbp.getDestCubes().isEmpty()) {
+ String gbyClause = null;
+ HashMap<String, ASTNode> gbysMap = qbp.getDestToGroupBy();
+ if (gbysMap.size() == 1) {
+ ASTNode gbyAST = gbysMap.entrySet().iterator().next().getValue();
+ gbyClause = SemanticAnalyzer.this.ctx.getTokenRewriteStream()
+ .toString(gbyAST.getTokenStartIndex(),
+ gbyAST.getTokenStopIndex());
+ gbyClause = "in '" + gbyClause + "'.";
+ } else {
+ gbyClause = ".";
+ }
+ String msg = String.format("Encountered Grouping Set/Cube/Rollup%s"
+ + " Currently we don't support Grouping Set/Cube/Rollup"
+ + " clauses in CBO," + " turn off cbo for these queries.",
+ gbyClause);
+ LOG.debug(msg);
+ throw new OptiqSemanticException(msg);
+ }
+
+ // 1. Gather GB Expressions (AST) (GB + Aggregations)
+ // NOTE: Multi Insert is not supported
+ String detsClauseName = qbp.getClauseNames().iterator().next();
+ List<ASTNode> grpByAstExprs = getGroupByForClause(qbp, detsClauseName);
+ HashMap<String, ASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName);
+ boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false;
+ boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true
+ : false;
+
+ if (hasGrpByAstExprs || hasAggregationTrees) {
+ ArrayList<ExprNodeDesc> gbExprNDescLst = new ArrayList<ExprNodeDesc>();
+ ArrayList<String> outputColumnNames = new ArrayList<String>();
+
+ // 2. Input, Output Row Resolvers
+ RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel);
+ RowResolver groupByOutputRowResolver = new RowResolver();
+ groupByOutputRowResolver.setIsExprResolver(true);
+
+ if (hasGrpByAstExprs) {
+ // 3. Construct GB Keys (ExprNode)
+ for (int i = 0; i < grpByAstExprs.size(); ++i) {
+ ASTNode grpbyExpr = grpByAstExprs.get(i);
+ Map<ASTNode, ExprNodeDesc> astToExprNDescMap = TypeCheckProcFactory.genExprNode(
+ grpbyExpr, new TypeCheckCtx(groupByInputRowResolver));
+ ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr);
+ if (grpbyExprNDesc == null)
+ throw new RuntimeException("Invalid Column Reference: " + grpbyExpr.dump());
+
+ addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr,
+ grpbyExprNDesc, gbExprNDescLst, outputColumnNames);
+ }
+ }
+
+ // 4. Construct aggregation function Info
+ ArrayList<AggInfo> aggregations = new ArrayList<AggInfo>();
+ if (hasAggregationTrees) {
+ assert (aggregationTrees != null);
+ for (ASTNode value : aggregationTrees.values()) {
+ // 4.1 Determine type of UDAF
+ // This is the GenericUDAF name
+ String aggName = unescapeIdentifier(value.getChild(0).getText());
+ boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
+ boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
+
+ // 4.2 Convert UDAF Params to ExprNodeDesc
+ ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
+ for (int i = 1; i < value.getChildCount(); i++) {
+ ASTNode paraExpr = (ASTNode) value.getChild(i);
+ ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver);
+ aggParameters.add(paraExprNode);
+ }
+
+ Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct);
+ GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName,
+ aggParameters, value, isDistinct, isAllColumns);
+ assert (genericUDAFEvaluator != null);
+ GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
+ AggInfo aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct);
+ aggregations.add(aInfo);
+ String field = getColumnInternalName(gbExprNDescLst.size() + aggregations.size() - 1);
+ outputColumnNames.add(field);
+ groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, aInfo.m_returnType,
+ "", false));
+ }
+ }
+
+ gbRel = genGBRelNode(gbExprNDescLst, aggregations, srcRel);
+ relToHiveColNameOptiqPosMap.put(gbRel,
+ buildHiveToOptiqColumnMap(groupByOutputRowResolver, gbRel));
+ this.relToHiveRR.put(gbRel, groupByOutputRowResolver);
+ }
+
+ return gbRel;
+ }
+
+ private RelNode genOBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
+ RelNode relToRet = null;
+
+ QBParseInfo qbp = getQBParseInfo(qb);
+ String dest = qbp.getClauseNames().iterator().next();
+ ASTNode obAST = qbp.getOrderByForClause(dest);
+
+ if (obAST != null) {
+ // 1. OB Expr sanity test
+ // in strict mode, in the presence of order by, limit must be specified
+ Integer limit = qb.getParseInfo().getDestLimit(dest);
+ if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")
+ && limit == null) {
+ throw new SemanticException(generateErrorMessage(obAST,
+ ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg()));
+ }
+
+ // 2. Walk through OB exprs and extract field collations and additional
+ // virtual columns needed
+ final List<RexNode> newVCLst = new ArrayList<RexNode>();
+ final List<RelFieldCollation> fieldCollations = Lists.newArrayList();
+ int fieldIndex = 0;
+
+ List<Node> obASTExprLst = obAST.getChildren();
+ ASTNode obASTExpr;
+ List<Pair<ASTNode, TypeInfo>> vcASTTypePairs = new ArrayList<Pair<ASTNode, TypeInfo>>();
+ RowResolver inputRR = relToHiveRR.get(srcRel);
+ RowResolver outputRR = new RowResolver();
+
+ RexNode rnd;
+ RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(),
+ relToHiveColNameOptiqPosMap.get(srcRel), 0, false);
+ int srcRelRecordSz = srcRel.getRowType().getFieldCount();
+
+ for (int i = 0; i < obASTExprLst.size(); i++) {
+ // 2.1 Convert AST Expr to ExprNode
+ obASTExpr = (ASTNode) obASTExprLst.get(i);
+ Map<ASTNode, ExprNodeDesc> astToExprNDescMap = TypeCheckProcFactory.genExprNode(
+ obASTExpr, new TypeCheckCtx(inputRR));
+ ExprNodeDesc obExprNDesc = astToExprNDescMap.get(obASTExpr.getChild(0));
+ if (obExprNDesc == null)
+ throw new SemanticException("Invalid order by expression: " + obASTExpr.toString());
+
+ // 2.2 Convert ExprNode to RexNode
+ rnd = converter.convert(obExprNDesc);
+
+ // 2.3 Determine the index of ob expr in child schema
+ // NOTE: Optiq can not take compound exprs in OB without it being
+ // present in the child (& hence we add a child Project Rel)
+ if (rnd instanceof RexInputRef) {
+ fieldIndex = ((RexInputRef) rnd).getIndex();
+ } else {
+ fieldIndex = srcRelRecordSz + newVCLst.size();
+ newVCLst.add(rnd);
+ vcASTTypePairs.add(new Pair<ASTNode, TypeInfo>((ASTNode) obASTExpr.getChild(0),
+ obExprNDesc.getTypeInfo()));
+ }
+
+ // 2.4 Determine the Direction of order by
+ org.eigenbase.rel.RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING;
+ if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) {
+ order = RelFieldCollation.Direction.ASCENDING;
+ }
+
+ // 2.5 Add to field collations
+ fieldCollations.add(new RelFieldCollation(fieldIndex, order));
+ }
+
+ // 3. Add Child Project Rel if needed
+ RelNode obInputRel = srcRel;
+ if (!newVCLst.isEmpty()) {
+ List<RexNode> originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(),
+ new Function<RelDataTypeField, RexNode>() {
+ @Override
+ public RexNode apply(RelDataTypeField input) {
+ return new RexInputRef(input.getIndex(), input.getType());
+ }
+ });
+
+ obInputRel = HiveProjectRel.create(srcRel, CompositeList.of(originalInputRefs, newVCLst),
+ null);
+ }
+
+ // 4. Construct SortRel
+ RelTraitSet traitSet = cluster.traitSetOf(HiveRel.CONVENTION);
+ RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations));
+ // TODO: Is it better to introduce a
+ // project on top to restrict VC from showing up in sortRel type
+ RelNode sortRel = new HiveSortRel(cluster, traitSet, obInputRel, canonizedCollation,
+ null, null);
+
+ // 5. Construct OB Parent Rel If needed
+ // Construct a parent Project if OB has virtual columns(vc) otherwise
+ // vc would show up in the result
+ // TODO: If OB is part of sub query & Parent Query select is not of the
+ // type "select */.*..." then parent project is not needed
+ relToRet = sortRel;
+ if (!newVCLst.isEmpty()) {
+ List<RexNode> obParentRelProjs = Lists.transform(srcRel.getRowType().getFieldList(),
+ new Function<RelDataTypeField, RexNode>() {
+ @Override
+ public RexNode apply(RelDataTypeField input) {
+ return new RexInputRef(input.getIndex(), input.getType());
+ }
+ });
+
+ relToRet = HiveProjectRel.create(sortRel, obParentRelProjs, null);
+ }
+
+ // 6. Construct output RR
+ RowResolver.add(outputRR, inputRR, 0);
+
+ // 7. Update the maps
+ // NOTE: Output RR for SortRel is considered same as its input; we may
+ // end up not using VC that is present in sort rel. Also note that
+ // rowtype of sortrel is the type of it child; if child happens to be
+ // synthetic project that we introduced then that projectrel would
+ // contain the vc.
+ ImmutableMap<String, Integer> hiveColNameOptiqPosMap = buildHiveToOptiqColumnMap(outputRR,
+ relToRet);
+ relToHiveRR.put(relToRet, outputRR);
+ relToHiveColNameOptiqPosMap.put(relToRet, hiveColNameOptiqPosMap);
+ }
+
+ return relToRet;
+ }
+
+ private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
+ HiveRel sortRel = null;
+ QBParseInfo qbp = getQBParseInfo(qb);
+ Integer limit = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next());
+
+ if (limit != null) {
+ RexNode fetch = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit));
+ RelTraitSet traitSet = cluster.traitSetOf(HiveRel.CONVENTION);
+ RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.EMPTY);
+ sortRel = new HiveSortRel(cluster, traitSet, srcRel, canonizedCollation, null, fetch);
+
+ RowResolver outputRR = new RowResolver();
+ RowResolver.add(outputRR, relToHiveRR.get(srcRel), 0);
+ ImmutableMap<String, Integer> hiveColNameOptiqPosMap = buildHiveToOptiqColumnMap(outputRR,
+ sortRel);
+ relToHiveRR.put(sortRel, outputRR);
+ relToHiveColNameOptiqPosMap.put(sortRel, hiveColNameOptiqPosMap);
+ }
+
+ return sortRel;
+ }
+
+ List<RexNode> getPartitionKeys(PartitionSpec ps, RexNodeConverter converter, RowResolver inputRR)
+ throws SemanticException {
+ List<RexNode> pKeys = new ArrayList<RexNode>();
+ if (ps != null) {
+ List<PartitionExpression> pExprs = ps.getExpressions();
+ for (PartitionExpression pExpr : pExprs) {
+ TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
+ tcCtx.setAllowStatefulFunctions(true);
+ ExprNodeDesc exp = genExprNodeDesc(pExpr.getExpression(), inputRR, tcCtx);
+ pKeys.add(converter.convert(exp));
+ }
+ }
+
+ return pKeys;
+ }
+
+ List<RexFieldCollation> getOrderKeys(OrderSpec os, RexNodeConverter converter,
+ RowResolver inputRR) throws SemanticException {
+ List<RexFieldCollation> oKeys = new ArrayList<RexFieldCollation>();
+ if (os != null) {
+ List<OrderExpression> oExprs = os.getExpressions();
+ for (OrderExpression oExpr : oExprs) {
+ TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
+ tcCtx.setAllowStatefulFunctions(true);
+ ExprNodeDesc exp = genExprNodeDesc(oExpr.getExpression(), inputRR, tcCtx);
+ RexNode ordExp = converter.convert(exp);
+ Set<SqlKind> flags = new HashSet<SqlKind>();
+ if (oExpr.getOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC)
+ flags.add(SqlKind.DESCENDING);
+ oKeys.add(new RexFieldCollation(ordExp, flags));
+ }
+ }
+
+ return oKeys;
+ }
+
+ RexWindowBound getBound(BoundarySpec bs, RexNodeConverter converter) {
+ RexWindowBound rwb = null;
+
+ if (bs != null) {
+ SqlNode sn = null;
+ SqlParserPos pos = new SqlParserPos(1, 1);
+ SqlNode amt = bs.getAmt() == 0 ? null : SqlLiteral.createExactNumeric(
+ String.valueOf(bs.getAmt()), new SqlParserPos(2, 2));
+ RexNode amtLiteral = null;
+ SqlCall sc = null;
+ RexNode rn = null;
+
+ if (amt != null)
+ amtLiteral = cluster.getRexBuilder().makeLiteral(new Integer(bs.getAmt()),
+ cluster.getTypeFactory().createSqlType(SqlTypeName.INTEGER), true);
+
+ switch (bs.getDirection()) {
+ case PRECEDING:
+ if (amt == null) {
+ rwb = RexWindowBound.create(SqlWindow.createUnboundedPreceding(pos), null);
+ } else {
+ sc = (SqlCall) SqlWindow.createPreceding(amt, pos);
+ rwb = RexWindowBound.create(sc,
+ cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral));
+ }
+ break;
+
+ case CURRENT:
+ rwb = RexWindowBound.create(SqlWindow.createCurrentRow(new SqlParserPos(1, 1)), null);
+ break;
+
+ case FOLLOWING:
+ if (amt == null) {
+ rwb = RexWindowBound.create(SqlWindow.createUnboundedFollowing(new SqlParserPos(1, 1)),
+ null);
+ } else {
+ sc = (SqlCall) SqlWindow.createFollowing(amt, pos);
+ rwb = RexWindowBound.create(sc,
+ cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral));
+ }
+ break;
+ }
+ }
+
+ return rwb;
+ }
+
+ int getWindowSpecIndx(ASTNode wndAST) {
+ int wndASTIndx = -1;
+ int wi = wndAST.getChildCount() - 1;
+ if (wi <= 0 || (wndAST.getChild(wi).getType() != HiveParser.TOK_WINDOWSPEC)) {
+ wi = -1;
+ }
+
+ return wi;
+ }
+
+ Pair<RexNode, TypeInfo> genWindowingProj(QB qb, WindowExpressionSpec wExpSpec, RelNode srcRel)
+ throws SemanticException {
+ RexNode w = null;
+ TypeInfo wHiveRetType = null;
+
+ if (wExpSpec instanceof WindowFunctionSpec) {
+ WindowFunctionSpec wFnSpec = (WindowFunctionSpec) wExpSpec;
+ ASTNode windowProjAst = wFnSpec.getExpression();
+ // TODO: do we need to get to child?
+ int wndSpecASTIndx = getWindowSpecIndx(windowProjAst);
+ // 2. Get Hive Aggregate Info
+ AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1,
+ this.relToHiveRR.get(srcRel));
+
+ // 3. Get Optiq Return type for Agg Fn
+ wHiveRetType = hiveAggInfo.m_returnType;
+ RelDataType optiqAggFnRetType = TypeConverter.convert(hiveAggInfo.m_returnType,
+ this.cluster.getTypeFactory());
+
+ // 4. Convert Agg Fn args to Optiq
+ ImmutableMap<String, Integer> posMap = this.relToHiveColNameOptiqPosMap.get(srcRel);
+ RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(),
+ posMap, 0, false);
+ Builder<RexNode> optiqAggFnArgsBldr = ImmutableList.<RexNode> builder();
+ Builder<RelDataType> optiqAggFnArgsTypeBldr = ImmutableList.<RelDataType> builder();
+ RexNode rexNd = null;
+ for (int i = 0; i < hiveAggInfo.m_aggParams.size(); i++) {
+ optiqAggFnArgsBldr.add(converter.convert(hiveAggInfo.m_aggParams.get(i)));
+ optiqAggFnArgsTypeBldr.add(TypeConverter.convert(hiveAggInfo.m_aggParams.get(i)
+ .getTypeInfo(), this.cluster.getTypeFactory()));
+ }
+ ImmutableList<RexNode> optiqAggFnArgs = optiqAggFnArgsBldr.build();
+ ImmutableList<RelDataType> optiqAggFnArgsType = optiqAggFnArgsTypeBldr.build();
+
+ // 5. Get Optiq Agg Fn
+ final SqlAggFunction optiqAggFn = SqlFunctionConverter.getOptiqAggFn(hiveAggInfo.m_udfName,
+ optiqAggFnArgsType, optiqAggFnRetType);
+
+ // 6. Translate Window spec
+ RowResolver inputRR = relToHiveRR.get(srcRel);
+ WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec();
+ List<RexNode> partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR);
+ List<RexFieldCollation> orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR);
+ RexWindowBound upperBound = getBound(wndSpec.windowFrame.start, converter);
+ RexWindowBound lowerBound = getBound(wndSpec.windowFrame.end, converter);
+ boolean isRows = ((wndSpec.windowFrame.start instanceof RangeBoundarySpec) || (wndSpec.windowFrame.end instanceof RangeBoundarySpec)) ? true
+ : false;
+
+ w = cluster.getRexBuilder().makeOver(optiqAggFnRetType, optiqAggFn, optiqAggFnArgs,
+ partitionKeys, ImmutableList.<RexFieldCollation> copyOf(orderKeys), lowerBound,
+ upperBound, isRows, true, false);
+ } else {
+ // TODO: Convert to Semantic Exception
+ throw new RuntimeException("Unsupported window Spec");
+ }
+
+ return new Pair(w, wHiveRetType);
+ }
+
+ private RelNode genSelectForWindowing(QB qb, RelNode srcRel) throws SemanticException {
+ RelNode selOpForWindow = null;
+ QBParseInfo qbp = getQBParseInfo(qb);
+ WindowingSpec wSpec = (!qb.getAllWindowingSpecs().isEmpty()) ? qb.getAllWindowingSpecs()
+ .values().iterator().next() : null;
+
+ if (wSpec != null) {
+ // 1. Get valid Window Function Spec
+ wSpec.validateAndMakeEffective();
+ List<WindowExpressionSpec> windowExpressions = wSpec.getWindowExpressions();
+
+ if (windowExpressions != null && !windowExpressions.isEmpty()) {
+ RowResolver inputRR = this.relToHiveRR.get(srcRel);
+ // 2. Get RexNodes for original Projections from below
+ List<RexNode> projsForWindowSelOp = new ArrayList<RexNode>(
+ HiveOptiqUtil.getProjsFromBelowAsInputRef(srcRel));
+
+ // 3. Construct new Row Resolver with everything from below.
+ RowResolver out_rwsch = new RowResolver();
+ RowResolver.add(out_rwsch, inputRR, 0);
+
+ // 4. Walk through Window Expressions & Construct RexNodes for those,
+ // Update out_rwsch
+ for (WindowExpressionSpec wExprSpec : windowExpressions) {
+ if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) {
+ Pair<RexNode, TypeInfo> wtp = genWindowingProj(qb, wExprSpec, srcRel);
+ projsForWindowSelOp.add(wtp.getKey());
+
+ // 6.2.2 Update Output Row Schema
+ ColumnInfo oColInfo = new ColumnInfo(
+ getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(), null, false);
+ if (false) {
+ out_rwsch.checkColumn(null, wExprSpec.getAlias());
+ out_rwsch.put(null, wExprSpec.getAlias(), oColInfo);
+ } else {
+ out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo);
+ }
+ }
+ }
+
+ selOpForWindow = genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel);
+ }
+ }
+
+ return selOpForWindow;
+ }
+
+ private RelNode genSelectRelNode(List<RexNode> optiqColLst, RowResolver out_rwsch,
+ RelNode srcRel) throws OptiqSemanticException {
+ // 1. Build Column Names
+ // TODO: Should this be external names
+ ArrayList<String> columnNames = new ArrayList<String>();
+ for (int i = 0; i < optiqColLst.size(); i++) {
+ columnNames.add(getColumnInternalName(i));
+ }
+
+ // 2. Prepend column names with '_o_'
+ /*
+ * Hive treats names that start with '_c' as internalNames; so change the
+ * names so we don't run into this issue when converting back to Hive AST.
+ */
+ List<String> oFieldNames = Lists.transform(columnNames, new Function<String, String>() {
+ @Override
+ public String apply(String hName) {
+ return "_o_" + hName;
+ }
+ });
+
+ // 3 Build Optiq Rel Node for project using converted projections & col
+ // names
+ HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames);
+
+ // 4. Keep track of colname-to-posmap && RR for new select
+ this.relToHiveColNameOptiqPosMap.put(selRel, buildHiveToOptiqColumnMap(out_rwsch, selRel));
+ this.relToHiveRR.put(selRel, out_rwsch);
+
+ return selRel;
+ }
+
+ /**
+ * NOTE: there can only be one select caluse since we don't handle multi
+ * destination insert.
+ *
+ * @throws SemanticException
+ */
+ private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
+
+ // 0. Generate a Select Node for Windowing
+ RelNode selForWindow = genSelectForWindowing(qb, srcRel);
+ srcRel = (selForWindow == null) ? srcRel : selForWindow;
+
+ boolean subQuery;
+ ArrayList<ExprNodeDesc> col_list = new ArrayList<ExprNodeDesc>();
+ ArrayList<Pair<Integer, RexNode>> windowingRexNodes = new ArrayList<Pair<Integer, RexNode>>();
+
+ // 1. Get Select Expression List
+ QBParseInfo qbp = getQBParseInfo(qb);
+ String selClauseName = qbp.getClauseNames().iterator().next();
+ ASTNode selExprList = qbp.getSelForClause(selClauseName);
+
+ // 2.Row resolvers for input, output
+ RowResolver out_rwsch = new RowResolver();
+ ASTNode trfm = null;
+ Integer pos = Integer.valueOf(0);
+ RowResolver inputRR = this.relToHiveRR.get(srcRel);
+
+ // 3. Query Hints
+ // TODO: Handle Query Hints; currently we ignore them
+ boolean selectStar = false;
+ int posn = 0;
+ boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST);
+ if (hintPresent) {
+ String hint = SemanticAnalyzer.this.ctx.getTokenRewriteStream().
+ toString(
+ selExprList.getChild(0).getTokenStartIndex(),
+ selExprList.getChild(0).getTokenStopIndex());
+ String msg = String.format("Hint specified for %s."
+ + " Currently we don't support hints in CBO, turn off cbo to use hints.", hint);
+ LOG.debug(msg);
+ throw new OptiqSemanticException(msg);
+ }
+
+ // 4. Determine if select corresponds to a subquery
+ subQuery = qb.getParseInfo().getIsSubQ();
+
+ // 4. Bailout if select involves Transform
+ boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM);
+ if (isInTransform) {
+ String msg = String.format("SELECT TRANSFORM is currently not supported in CBO,"
+ + " turn off cbo to use TRANSFORM.");
+ LOG.debug(msg);
+ throw new OptiqSemanticException(msg);
+ }
+
+ // 5. Bailout if select involves UDTF
+ ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0);
+ GenericUDTF genericUDTF = null;
+ int udtfExprType = udtfExpr.getType();
+ if (udtfExprType == HiveParser.TOK_FUNCTION || udtfExprType == HiveParser.TOK_FUNCTIONSTAR) {
+ String funcName = TypeCheckProcFactory.DefaultExprProcessor.getFunctionText(udtfExpr, true);
+ FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
+ if (fi != null) {
+ genericUDTF = fi.getGenericUDTF();
+ }
+ if (genericUDTF != null) {
+ String msg = String.format("UDTF " + funcName + " is currently not supported in CBO,"
+ + " turn off cbo to use UDTF " + funcName);
+ LOG.debug(msg);
+ throw new OptiqSemanticException(msg);
+ }
+ }
+
+ // 6. Iterate over all expression (after SELECT)
+ ASTNode exprList = selExprList;
+ int startPosn = posn;
+ int wndProjPos = 0;
+ List<String> tabAliasesForAllProjs = getTabAliases(inputRR);
+ for (int i = startPosn; i < exprList.getChildCount(); ++i) {
+
+ // 6.1 child can be EXPR AS ALIAS, or EXPR.
+ ASTNode child = (ASTNode) exprList.getChild(i);
+ boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2);
+
+ // 6.2 EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's
+ // This check is not needed and invalid when there is a transform b/c
+ // the
+ // AST's are slightly different.
+ if (child.getChildCount() > 2) {
+ throw new SemanticException(generateErrorMessage((ASTNode) child.getChild(2),
+ ErrorMsg.INVALID_AS.getMsg()));
+ }
+
+ ASTNode expr;
+ String tabAlias;
+ String colAlias;
+
+ // 6.3 Get rid of TOK_SELEXPR
+ expr = (ASTNode) child.getChild(0);
+ String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR,
+ autogenColAliasPrfxIncludeFuncName, i);
+ tabAlias = colRef[0];
+ colAlias = colRef[1];
+
+ // 6.4 Build ExprNode corresponding to colums
+ if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
+ pos = genColListRegex(".*",
+ expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0))
+ .toLowerCase(), expr, col_list, inputRR, pos, out_rwsch, tabAliasesForAllProjs,
+ subQuery);
+ selectStar = true;
+ } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
+ && !inputRR.getIsExprResolver()
+ && isRegex(unescapeIdentifier(expr.getChild(0).getText()), conf)) {
+ // In case the expression is a regex COL.
+ // This can only happen without AS clause
+ // We don't allow this for ExprResolver - the Group By case
+ pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr,
+ col_list, inputRR, pos, out_rwsch, tabAliasesForAllProjs, subQuery);
+ } else if (expr.getType() == HiveParser.DOT
+ && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
+ && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText()
[... 309 lines stripped ...]