You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/10/06 05:44:26 UTC
svn commit: r1629562 [9/38] - in /hive/branches/spark: ./ accumulo-handler/
beeline/ beeline/src/java/org/apache/hive/beeline/ bin/ext/ common/
common/src/java/org/apache/hadoop/hive/conf/
common/src/test/org/apache/hadoop/hive/common/type/ contrib/src...
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1629562&r1=1629561&r2=1629562&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Mon Oct 6 03:44:13 2014
@@ -22,10 +22,8 @@ import static org.apache.hadoop.hive.con
import java.io.IOException;
import java.io.Serializable;
-import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -36,18 +34,12 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
-import net.hydromatic.optiq.SchemaPlus;
-import net.hydromatic.optiq.tools.Frameworks;
-
import org.antlr.runtime.ClassicToken;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree;
-import org.antlr.runtime.tree.TreeVisitor;
-import org.antlr.runtime.tree.TreeVisitorAction;
import org.antlr.runtime.tree.TreeWizard;
import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
import org.apache.commons.lang.StringUtils;
@@ -115,29 +107,6 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
-import org.apache.hadoop.hive.ql.optimizer.optiq.HiveDefaultRelMetadataProvider;
-import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil;
-import org.apache.hadoop.hive.ql.optimizer.optiq.HiveTypeSystemImpl;
-import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException;
-import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
-import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
-import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveVolcanoPlanner;
-import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel;
-import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel;
-import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
-import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
-import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel;
-import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel;
-import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
-import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveUnionRel;
-import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePartitionPrunerRule;
-import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushFilterPastJoinRule;
-import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ASTConverter;
-import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinCondTypeCheckProcFactory;
-import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinTypeCheckCtx;
-import org.apache.hadoop.hive.ql.optimizer.optiq.translator.RexNodeConverter;
-import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter;
-import org.apache.hadoop.hive.ql.optimizer.optiq.translator.TypeConverter;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
@@ -223,74 +192,12 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.InputFormat;
-import org.eigenbase.rel.AggregateCall;
-import org.eigenbase.rel.AggregateRelBase;
-import org.eigenbase.rel.Aggregation;
-import org.eigenbase.rel.FilterRelBase;
-import org.eigenbase.rel.InvalidRelException;
-import org.eigenbase.rel.JoinRelBase;
-import org.eigenbase.rel.JoinRelType;
-import org.eigenbase.rel.RelCollation;
-import org.eigenbase.rel.RelCollationImpl;
-import org.eigenbase.rel.RelFactories;
-import org.eigenbase.rel.RelFieldCollation;
-import org.eigenbase.rel.RelNode;
-import org.eigenbase.rel.metadata.CachingRelMetadataProvider;
-import org.eigenbase.rel.metadata.ChainedRelMetadataProvider;
-import org.eigenbase.rel.metadata.RelMetadataProvider;
-import org.eigenbase.rel.rules.ConvertMultiJoinRule;
-import org.eigenbase.rel.rules.FilterAggregateTransposeRule;
-import org.eigenbase.rel.rules.LoptOptimizeJoinRule;
-import org.eigenbase.rel.rules.MergeFilterRule;
-import org.eigenbase.rel.rules.PushFilterPastProjectRule;
-import org.eigenbase.rel.rules.PushFilterPastSetOpRule;
-import org.eigenbase.rel.rules.SemiJoinRel;
-import org.eigenbase.rel.rules.TransitivePredicatesOnJoinRule;
-import org.eigenbase.relopt.RelOptCluster;
-import org.eigenbase.relopt.RelOptPlanner;
-import org.eigenbase.relopt.RelOptQuery;
-import org.eigenbase.relopt.RelOptRule;
-import org.eigenbase.relopt.RelOptSchema;
-import org.eigenbase.relopt.RelOptUtil;
-import org.eigenbase.relopt.RelTraitSet;
-import org.eigenbase.relopt.hep.HepMatchOrder;
-import org.eigenbase.relopt.hep.HepPlanner;
-import org.eigenbase.relopt.hep.HepProgram;
-import org.eigenbase.relopt.hep.HepProgramBuilder;
-import org.eigenbase.reltype.RelDataType;
-import org.eigenbase.reltype.RelDataTypeFactory;
-import org.eigenbase.reltype.RelDataTypeField;
-import org.eigenbase.rex.RexBuilder;
-import org.eigenbase.rex.RexInputRef;
-import org.eigenbase.rex.RexNode;
-import org.eigenbase.rex.RexWindowBound;
-import org.eigenbase.rex.RexFieldCollation;
-import org.eigenbase.sql.SqlAggFunction;
-import org.eigenbase.sql.SqlWindow;
-import org.eigenbase.sql.parser.SqlParserPos;
-import org.eigenbase.sql.type.SqlTypeName;
-import org.eigenbase.sql2rel.RelFieldTrimmer;
-import org.eigenbase.sql.SqlCall;
-import org.eigenbase.sql.SqlExplainLevel;
-import org.eigenbase.sql.SqlKind;
-import org.eigenbase.sql.SqlNode;
-import org.eigenbase.sql.SqlLiteral;
-import org.eigenbase.util.CompositeList;
-import org.eigenbase.util.ImmutableIntList;
-import org.eigenbase.util.Pair;
-
-import com.google.common.base.Function;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableList.Builder;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
/**
* Implementation of the semantic analyzer. It generates the query plan.
@@ -358,9 +265,6 @@ public class SemanticAnalyzer extends Ba
//flag for partial scan during analyze ... compute statistics
protected boolean partialscan;
- private volatile boolean runCBO = true;
- private volatile boolean disableJoinMerge = false;
-
/*
* Capture the CTE definitions in a Query.
*/
@@ -375,11 +279,6 @@ public class SemanticAnalyzer extends Ba
int nextNum;
}
- protected SemanticAnalyzer(HiveConf conf, boolean runCBO) throws SemanticException {
- this(conf);
- this.runCBO = runCBO;
- }
-
public SemanticAnalyzer(HiveConf conf) throws SemanticException {
super(conf);
opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>();
@@ -416,11 +315,8 @@ public class SemanticAnalyzer extends Ba
}
@Override
- protected void reset(boolean clearPartsCache) {
- super.reset(true);
- if(clearPartsCache) {
- prunedPartitions.clear();
- }
+ protected void reset() {
+ super.reset();
loadTableWork.clear();
loadFileWork.clear();
topOps.clear();
@@ -434,7 +330,7 @@ public class SemanticAnalyzer extends Ba
smbMapJoinContext.clear();
opParseCtx.clear();
groupOpToInputTables.clear();
- disableJoinMerge = false;
+ prunedPartitions.clear();
aliasToCTEs.clear();
topToTable.clear();
opToPartPruner.clear();
@@ -448,6 +344,8 @@ public class SemanticAnalyzer extends Ba
viewsExpanded = null;
viewSelect = null;
ctesExpanded = null;
+ noscan = false;
+ partialscan = false;
globalLimitCtx.disableOpt();
viewAliasToInput.clear();
reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
@@ -456,6 +354,7 @@ public class SemanticAnalyzer extends Ba
unparseTranslator.clear();
queryProperties.clear();
outputs.clear();
+ globalLimitCtx.reset();
}
public void initParseCtx(ParseContext pctx) {
@@ -644,10 +543,6 @@ public class SemanticAnalyzer extends Ba
public static String generateErrorMessage(ASTNode ast, String message) {
StringBuilder sb = new StringBuilder();
- if (ast == null) {
- sb.append("The abstract syntax tree is null");
- return sb.toString();
- }
sb.append(ast.getLine());
sb.append(":");
sb.append(ast.getCharPositionInLine());
@@ -1062,7 +957,9 @@ public class SemanticAnalyzer extends Ba
private boolean isJoinToken(ASTNode node) {
if ((node.getToken().getType() == HiveParser.TOK_JOIN)
|| (node.getToken().getType() == HiveParser.TOK_CROSSJOIN)
- || isOuterJoinToken(node)
+ || (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN)
|| (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
|| (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
return true;
@@ -1071,12 +968,6 @@ public class SemanticAnalyzer extends Ba
return false;
}
- private boolean isOuterJoinToken(ASTNode node) {
- return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
- || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
- || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN);
- }
-
/**
* Given the AST with TOK_JOIN as the root, get all the aliases for the tables
* or subqueries in the join.
@@ -1094,7 +985,6 @@ public class SemanticAnalyzer extends Ba
"Join with multiple children"));
}
- queryProperties.incrementJoinCount(isOuterJoinToken(join));
for (int num = 0; num < numChildren; num++) {
ASTNode child = (ASTNode) join.getChild(num);
if (child.getToken().getType() == HiveParser.TOK_TABREF) {
@@ -1201,15 +1091,10 @@ public class SemanticAnalyzer extends Ba
qb.countSel();
qbp.setSelExprForClause(ctx_1.dest, ast);
- int posn = 0;
if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) {
qbp.setHints((ASTNode) ast.getChild(0));
- posn++;
}
- if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM))
- queryProperties.setUsesScript(true);
-
LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast,
qb, ctx_1.dest);
doPhase1GetColumnAliasesFromSelect(ast, qbp);
@@ -1220,8 +1105,6 @@ public class SemanticAnalyzer extends Ba
case HiveParser.TOK_WHERE:
qbp.setWhrExprForClause(ctx_1.dest, ast);
- if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty())
- queryProperties.setFilterWithSubQuery(true);
break;
case HiveParser.TOK_INSERT_INTO:
@@ -1244,9 +1127,6 @@ public class SemanticAnalyzer extends Ba
}
}
qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
-
- if (qbp.getClauseNamesForDest().size() > 1)
- queryProperties.setMultiDestQuery(true);
break;
case HiveParser.TOK_FROM:
@@ -1270,9 +1150,9 @@ public class SemanticAnalyzer extends Ba
processSubQuery(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
- queryProperties.setHasLateralViews(true);
processLateralView(qb, frm);
} else if (isJoinToken(frm)) {
+ queryProperties.setHasJoin(true);
processJoin(qb, frm);
qbp.setJoinExpr(frm);
}else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
@@ -1485,10 +1365,6 @@ public class SemanticAnalyzer extends Ba
}
}
- public Table getTable(TableScanOperator ts) {
- return topToTable.get(ts);
- }
-
public void getMetaData(QB qb) throws SemanticException {
getMetaData(qb, null);
}
@@ -1547,20 +1423,11 @@ public class SemanticAnalyzer extends Ba
}
// Disallow INSERT INTO on bucketized tables
- boolean isAcid = isAcidTable(tab);
if (qb.getParseInfo().isInsertIntoTable(tab.getDbName(), tab.getTableName()) &&
- tab.getNumBuckets() > 0 && !isAcid) {
+ tab.getNumBuckets() > 0 && !isAcidTable(tab)) {
throw new SemanticException(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE.
getMsg("Table: " + tab_name));
}
- // Disallow update and delete on non-acid tables
- if ((updating() || deleting()) && !isAcid) {
- // isAcidTable above also checks for whether we are using an acid compliant
- // transaction manager. But that has already been caught in
- // UpdateDeleteSemanticAnalyzer, so if we are updating or deleting and getting nonAcid
- // here, it means the table itself doesn't support it.
- throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, tab_name);
- }
// We check offline of the table, as if people only select from an
// non-existing partition of an offline table, the partition won't
@@ -1639,10 +1506,6 @@ public class SemanticAnalyzer extends Ba
qb.getParseInfo().addTableSpec(alias, ts);
}
-
- ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
- PlanUtils.addInput(inputs,
- new ReadEntity(tab, parentViewInfo, parentViewInfo == null));
}
LOG.info("Get metadata for subqueries");
@@ -6393,7 +6256,6 @@ public class SemanticAnalyzer extends Ba
LOG.info("Modifying config values for ACID write");
conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, false);
conf.setBoolVar(ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES, true);
- conf.set(AcidUtils.CONF_ACID_KEY, "true");
}
/**
@@ -8030,7 +7892,7 @@ public class SemanticAnalyzer extends Ba
List<ASTNode> nodeConds = node.getExpressions().get(i + 1);
ArrayList<ASTNode> reordereNodeConds = new ArrayList<ASTNode>();
for(int k=0; k < tgtToNodeExprMap.length; k++) {
- reordereNodeConds.add(nodeConds.get(tgtToNodeExprMap[k]));
+ reordereNodeConds.add(nodeConds.get(k));
}
expr.add(reordereNodeConds);
}
@@ -9582,9 +9444,7 @@ public class SemanticAnalyzer extends Ba
aliasToOpInfo );
}
}
-
- if (!disableJoinMerge)
- mergeJoinTree(qb);
+ mergeJoinTree(qb);
}
// if any filters are present in the join tree, push them on top of the
@@ -9792,9 +9652,9 @@ public class SemanticAnalyzer extends Ba
}
@Override
- public void init(boolean clearPartsCache) {
+ public void init() {
// clear most members
- reset(clearPartsCache);
+ reset();
// init
QB qb = new QB(null, null, false);
@@ -9849,82 +9709,11 @@ public class SemanticAnalyzer extends Ba
getMetaData(qb);
LOG.info("Completed getting MetaData in Semantic Analysis");
-
- if (runCBO) {
- boolean tokenTypeIsQuery = ast.getToken().getType() == HiveParser.TOK_QUERY
- || ast.getToken().getType() == HiveParser.TOK_EXPLAIN;
- if (!tokenTypeIsQuery || createVwDesc != null
- || !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED)
- || !canHandleQuery(qb, true) || !HiveOptiqUtil.validateASTForCBO(ast)) {
- runCBO = false;
- }
-
- if (runCBO) {
- disableJoinMerge = true;
- }
- }
-
// Save the result schema derived from the sink operator produced
// by genPlan. This has the correct column names, which clients
// such as JDBC would prefer instead of the c0, c1 we'll end
// up with later.
- Operator sinkOp = null;
-
- if (runCBO) {
- OptiqBasedPlanner optiqPlanner = new OptiqBasedPlanner();
- boolean reAnalyzeAST = false;
-
- try {
- // 1. Gen Optimized AST
- ASTNode newAST = optiqPlanner.getOptimizedAST(prunedPartitions);
-
- // 2. Regen OP plan from optimized AST
- init(false);
- ctx_1 = initPhase1Ctx();
- if (!doPhase1(newAST, qb, ctx_1)) {
- throw new RuntimeException(
- "Couldn't do phase1 on CBO optimized query plan");
- }
- // unfortunately making prunedPartitions immutable is not possible here
- // with SemiJoins not all tables are costed in CBO,
- // so their PartitionList is not evaluated until the run phase.
- //prunedPartitions = ImmutableMap.copyOf(prunedPartitions);
- getMetaData(qb);
-
- disableJoinMerge = true;
- sinkOp = genPlan(qb);
- LOG.info("CBO Succeeded; optimized logical plan.");
- LOG.debug(newAST.dump());
-
- /*
- * Use non CBO Result Set Schema so as to preserve user specified names.
- * Hive seems to have bugs with OB/LIMIT in sub queries. // 3. Reset
- * result set schema resultSchema =
- * convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp)
- * .getRowResolver(), true);
- */
- } catch (Exception e) {
- LOG.error("CBO failed, skipping CBO. ", e);
- if (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) ||
- (optiqPlanner.noColsMissingStats.get() > 0) ||
- e instanceof OptiqSemanticException) {
- reAnalyzeAST = true;
- } else {
- throw e instanceof SemanticException ? (SemanticException) e : new SemanticException(e);
- }
- } finally {
- runCBO = false;
- disableJoinMerge = false;
- if (reAnalyzeAST) {
- init(true);
- prunedPartitions.clear();
- analyzeInternal(ast);
- return;
- }
- }
- } else {
- sinkOp = genPlan(qb);
- }
+ Operator sinkOp = genPlan(qb);
if (createVwDesc != null)
resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
@@ -12164,1946 +11953,4 @@ public class SemanticAnalyzer extends Ba
return false;
}
- /**** Temporary Place Holder For Optiq plan Gen, Optimizer ****/
-
- /*
- * Entry point to Optimizations using Optiq.
- */
- private boolean canHandleQuery(QB qbToChk, boolean topLevelQB) {
- boolean runOptiqPlanner = false;
- // Assumption:
- // 1. If top level QB is query then everything below it must also be Query
- // 2. Nested Subquery will return false for qbToChk.getIsQuery()
- if ((!topLevelQB || qbToChk.getIsQuery())
- && (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) || conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict"))
- && (!topLevelQB || (queryProperties.getJoinCount() > 1) || conf.getBoolVar(ConfVars.HIVE_IN_TEST))
- && !queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy()
- && !queryProperties.hasSortBy() && !queryProperties.hasPTF()
- && !queryProperties.usesScript() && !queryProperties.hasMultiDestQuery()
- && !queryProperties.hasLateralViews()) {
- runOptiqPlanner = true;
- } else {
- LOG.info("Can not invoke CBO; query contains operators not supported for CBO.");
- }
-
- return runOptiqPlanner;
- }
-
- private class OptiqBasedPlanner implements Frameworks.PlannerAction<RelNode> {
- RelOptCluster cluster;
- RelOptSchema relOptSchema;
- SemanticException semanticException;
- Map<String, PrunedPartitionList> partitionCache;
- AtomicInteger noColsMissingStats = new AtomicInteger(0);
- List<FieldSchema> topLevelFieldSchema;
-
- // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or
- // just last one.
- LinkedHashMap<RelNode, RowResolver> relToHiveRR = new LinkedHashMap<RelNode, RowResolver>();
- LinkedHashMap<RelNode, ImmutableMap<String, Integer>> relToHiveColNameOptiqPosMap = new LinkedHashMap<RelNode, ImmutableMap<String, Integer>>();
-
- private ASTNode getOptimizedAST(Map<String, PrunedPartitionList> partitionCache)
- throws SemanticException {
- ASTNode optiqOptimizedAST = null;
- RelNode optimizedOptiqPlan = null;
- this.partitionCache = partitionCache;
-
- try {
- optimizedOptiqPlan = Frameworks.withPlanner(this,
- Frameworks.newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build());
- } catch (Exception e) {
- if (semanticException != null)
- throw semanticException;
- else
- throw new RuntimeException(e);
- }
- optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, topLevelFieldSchema);
-
- return optiqOptimizedAST;
- }
-
- @Override
- public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) {
- RelNode optiqGenPlan = null;
- RelNode optiqPreCboPlan = null;
- RelNode optiqOptimizedPlan = null;
-
- /*
- * recreate cluster, so that it picks up the additional traitDef
- */
- RelOptPlanner planner = HiveVolcanoPlanner.createPlanner();
- final RelOptQuery query = new RelOptQuery(planner);
- final RexBuilder rexBuilder = cluster.getRexBuilder();
- cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder);
-
- this.cluster = cluster;
- this.relOptSchema = relOptSchema;
-
- try {
- optiqGenPlan = genLogicalPlan(qb);
- topLevelFieldSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(optiqGenPlan),
- HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
- } catch (SemanticException e) {
- semanticException = e;
- throw new RuntimeException(e);
- }
-
- optiqPreCboPlan = applyPreCBOTransforms(optiqGenPlan, HiveDefaultRelMetadataProvider.INSTANCE);
- List<RelMetadataProvider> list = Lists.newArrayList();
- list.add(HiveDefaultRelMetadataProvider.INSTANCE);
- RelTraitSet desiredTraits = cluster.traitSetOf(HiveRel.CONVENTION, RelCollationImpl.EMPTY);
-
- HepProgram hepPgm = null;
- HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP)
- .addRuleInstance(new ConvertMultiJoinRule(HiveJoinRel.class));
- hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoinRel.HIVE_JOIN_FACTORY,
- HiveProjectRel.DEFAULT_PROJECT_FACTORY, HiveFilterRel.DEFAULT_FILTER_FACTORY));
-
- hepPgm = hepPgmBldr.build();
- HepPlanner hepPlanner = new HepPlanner(hepPgm);
-
- hepPlanner.registerMetadataProviders(list);
- RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list);
- cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner));
-
- RelNode rootRel = optiqPreCboPlan;
- hepPlanner.setRoot(rootRel);
- if (!optiqPreCboPlan.getTraitSet().equals(desiredTraits)) {
- rootRel = hepPlanner.changeTraits(optiqPreCboPlan, desiredTraits);
- }
- hepPlanner.setRoot(rootRel);
-
- optiqOptimizedPlan = hepPlanner.findBestExp();
-
- if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) {
- LOG.debug("CBO Planning details:\n");
- LOG.debug("Original Plan:\n");
- LOG.debug(RelOptUtil.toString(optiqGenPlan));
- LOG.debug("Plan After PPD, PartPruning, ColumnPruning:\n");
- LOG.debug(RelOptUtil.toString(optiqPreCboPlan));
- LOG.debug("Plan After Join Reordering:\n");
- LOG.debug(RelOptUtil.toString(optiqOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES));
- }
-
- return optiqOptimizedPlan;
- }
-
- public RelNode applyPreCBOTransforms(RelNode basePlan, RelMetadataProvider mdProvider) {
-
- // TODO: Decorelation of subquery should be done before attempting
- // Partition Pruning; otherwise Expression evaluation may try to execute
- // corelated sub query.
- basePlan = hepPlan(basePlan, true, mdProvider, new PushFilterPastProjectRule(
- FilterRelBase.class, HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveProjectRel.class,
- HiveProjectRel.DEFAULT_PROJECT_FACTORY), new PushFilterPastSetOpRule(
- HiveFilterRel.DEFAULT_FILTER_FACTORY), new MergeFilterRule(
- HiveFilterRel.DEFAULT_FILTER_FACTORY), HivePushFilterPastJoinRule.JOIN,
- HivePushFilterPastJoinRule.FILTER_ON_JOIN,
- new FilterAggregateTransposeRule(
- FilterRelBase.class,
- HiveFilterRel.DEFAULT_FILTER_FACTORY,
- AggregateRelBase.class));
-
- basePlan = hepPlan(basePlan, false, mdProvider, new TransitivePredicatesOnJoinRule(
- JoinRelBase.class, HiveFilterRel.DEFAULT_FILTER_FACTORY),
- // TODO: Enable it after OPTIQ-407 is fixed
- //RemoveTrivialProjectRule.INSTANCE,
- new HivePartitionPrunerRule(SemanticAnalyzer.this.conf));
-
- RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProjectRel.DEFAULT_PROJECT_FACTORY,
- HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveJoinRel.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY,
- HiveSortRel.HIVE_SORT_REL_FACTORY, HiveAggregateRel.HIVE_AGGR_REL_FACTORY, HiveUnionRel.UNION_REL_FACTORY);
- basePlan = fieldTrimmer.trim(basePlan);
-
- basePlan = hepPlan(basePlan, true, mdProvider,
- new PushFilterPastProjectRule(FilterRelBase.class,
- HiveFilterRel.DEFAULT_FILTER_FACTORY, HiveProjectRel.class,
- HiveProjectRel.DEFAULT_PROJECT_FACTORY));
-
- return basePlan;
- }
-
- private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges,
- RelMetadataProvider mdProvider, RelOptRule... rules) {
-
- RelNode optimizedRelNode = basePlan;
- HepProgramBuilder programBuilder = new HepProgramBuilder();
- if (followPlanChanges) {
- programBuilder.addMatchOrder(HepMatchOrder.TOP_DOWN);
- programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules));
- } else {
- // TODO: Should this be also TOP_DOWN?
- for (RelOptRule r : rules)
- programBuilder.addRuleInstance(r);
- }
-
- HepPlanner planner = new HepPlanner(programBuilder.build());
- List<RelMetadataProvider> list = Lists.newArrayList();
- list.add(mdProvider);
- planner.registerMetadataProviders(list);
- RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list);
- basePlan.getCluster().setMetadataProvider(
- new CachingRelMetadataProvider(chainedProvider, planner));
-
- planner.setRoot(basePlan);
- optimizedRelNode = planner.findBestExp();
-
- return optimizedRelNode;
- }
-
- @SuppressWarnings("nls")
- private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel,
- String rightalias, RelNode rightRel) throws SemanticException {
- HiveUnionRel unionRel = null;
-
- // 1. Get Row Resolvers, Column map for original left and right input of
- // Union Rel
- RowResolver leftRR = this.relToHiveRR.get(leftRel);
- RowResolver rightRR = this.relToHiveRR.get(rightRel);
- HashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias);
- HashMap<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias);
-
- // 2. Validate that Union is feasible according to Hive (by using type
- // info from RR)
- if (leftmap.size() != rightmap.size()) {
- throw new SemanticException("Schema of both sides of union should match.");
- }
-
- ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias(
- qb.getAliases().get(0));
- for (Map.Entry<String, ColumnInfo> lEntry : leftmap.entrySet()) {
- String field = lEntry.getKey();
- ColumnInfo lInfo = lEntry.getValue();
- ColumnInfo rInfo = rightmap.get(field);
- if (rInfo == null) {
- throw new SemanticException(generateErrorMessage(tabref,
- "Schema of both sides of union should match. " + rightalias
- + " does not have the field " + field));
- }
- if (lInfo == null) {
- throw new SemanticException(generateErrorMessage(tabref,
- "Schema of both sides of union should match. " + leftalias
- + " does not have the field " + field));
- }
- if (!lInfo.getInternalName().equals(rInfo.getInternalName())) {
- throw new SemanticException(generateErrorMessage(tabref,
- "Schema of both sides of union should match: field " + field + ":"
- + " appears on the left side of the UNION at column position: "
- + getPositionFromInternalName(lInfo.getInternalName())
- + ", and on the right side of the UNION at column position: "
- + getPositionFromInternalName(rInfo.getInternalName())
- + ". Column positions should match for a UNION"));
- }
- // try widening coversion, otherwise fail union
- TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(),
- rInfo.getType());
- if (commonTypeInfo == null) {
- throw new SemanticException(generateErrorMessage(tabref,
- "Schema of both sides of union should match: Column " + field + " is of type "
- + lInfo.getType().getTypeName() + " on first table and type "
- + rInfo.getType().getTypeName() + " on second table"));
- }
- }
-
- // 3. construct Union Output RR using original left & right Input
- RowResolver unionoutRR = new RowResolver();
- for (Map.Entry<String, ColumnInfo> lEntry : leftmap.entrySet()) {
- String field = lEntry.getKey();
- ColumnInfo lInfo = lEntry.getValue();
- ColumnInfo rInfo = rightmap.get(field);
- ColumnInfo unionColInfo = new ColumnInfo(lInfo);
- unionColInfo.setTabAlias(unionalias);
- unionColInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(),
- rInfo.getType()));
- unionoutRR.put(unionalias, field, unionColInfo);
- }
-
- // 4. Determine which columns requires cast on left/right input (Optiq
- // requires exact types on both sides of union)
- boolean leftNeedsTypeCast = false;
- boolean rightNeedsTypeCast = false;
- List<RexNode> leftProjs = new ArrayList<RexNode>();
- List<RexNode> rightProjs = new ArrayList<RexNode>();
- List<RelDataTypeField> leftRowDT = leftRel.getRowType().getFieldList();
- List<RelDataTypeField> rightRowDT = rightRel.getRowType().getFieldList();
-
- RelDataType leftFieldDT;
- RelDataType rightFieldDT;
- RelDataType unionFieldDT;
- for (int i = 0; i < leftRowDT.size(); i++) {
- leftFieldDT = leftRowDT.get(i).getType();
- rightFieldDT = rightRowDT.get(i).getType();
- if (!leftFieldDT.equals(rightFieldDT)) {
- unionFieldDT = TypeConverter.convert(unionoutRR.getColumnInfos().get(i).getType(),
- cluster.getTypeFactory());
- if (!unionFieldDT.equals(leftFieldDT)) {
- leftNeedsTypeCast = true;
- }
- leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT,
- cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true));
-
- if (!unionFieldDT.equals(rightFieldDT)) {
- rightNeedsTypeCast = true;
- }
- rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT,
- cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true));
- } else {
- leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldDT,
- cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true));
- rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldDT,
- cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true));
- }
- }
-
- // 5. Introduce Project Rel above original left/right inputs if cast is
- // needed for type parity
- RelNode unionLeftInput = leftRel;
- RelNode unionRightInput = rightRel;
- if (leftNeedsTypeCast) {
- unionLeftInput = HiveProjectRel.create(leftRel, leftProjs, leftRel.getRowType()
- .getFieldNames());
- }
- if (rightNeedsTypeCast) {
- unionRightInput = HiveProjectRel.create(rightRel, rightProjs, rightRel.getRowType()
- .getFieldNames());
- }
-
- // 6. Construct Union Rel
- ImmutableList.Builder bldr = new ImmutableList.Builder<RelNode>();
- bldr.add(unionLeftInput);
- bldr.add(unionRightInput);
- unionRel = new HiveUnionRel(cluster, TraitsUtil.getDefaultTraitSet(cluster),
- bldr.build());
-
- relToHiveRR.put(unionRel, unionoutRR);
- relToHiveColNameOptiqPosMap.put(unionRel,
- this.buildHiveToOptiqColumnMap(unionoutRR, unionRel));
-
- return unionRel;
- }
-
- private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJoinType,
- ASTNode joinCond) throws SemanticException {
- RelNode joinRel = null;
-
- // 1. construct the RowResolver for the new Join Node by combining row
- // resolvers from left, right
- RowResolver leftRR = this.relToHiveRR.get(leftRel);
- RowResolver rightRR = this.relToHiveRR.get(rightRel);
- RowResolver joinRR = null;
-
- if (hiveJoinType != JoinType.LEFTSEMI) {
- joinRR = RowResolver.getCombinedRR(leftRR, rightRR);
- } else {
- joinRR = new RowResolver();
- RowResolver.add(joinRR, leftRR, 0);
- }
-
- // 2. Construct ExpressionNodeDesc representing Join Condition
- RexNode optiqJoinCond = null;
- if (joinCond != null) {
- JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType);
- Map<ASTNode, ExprNodeDesc> exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond,
- jCtx);
- if (jCtx.getError() != null)
- throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(),
- jCtx.getError()));
-
- ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond);
-
- List<RelNode> inputRels = new ArrayList<RelNode>();
- inputRels.add(leftRel);
- inputRels.add(rightRel);
- optiqJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels,
- relToHiveRR, relToHiveColNameOptiqPosMap, false);
- } else {
- optiqJoinCond = cluster.getRexBuilder().makeLiteral(true);
- }
-
- // 3. Validate that join condition is legal (i.e no function refering to
- // both sides of join, only equi join)
- // TODO: Join filter handling (only supported for OJ by runtime or is it
- // supported for IJ as well)
-
- // 4. Construct Join Rel Node
- boolean leftSemiJoin = false;
- JoinRelType optiqJoinType;
- switch (hiveJoinType) {
- case LEFTOUTER:
- optiqJoinType = JoinRelType.LEFT;
- break;
- case RIGHTOUTER:
- optiqJoinType = JoinRelType.RIGHT;
- break;
- case FULLOUTER:
- optiqJoinType = JoinRelType.FULL;
- break;
- case LEFTSEMI:
- optiqJoinType = JoinRelType.INNER;
- leftSemiJoin = true;
- break;
- case INNER:
- default:
- optiqJoinType = JoinRelType.INNER;
- break;
- }
-
- if (leftSemiJoin) {
- List<RelDataTypeField> sysFieldList = new ArrayList<RelDataTypeField>();
- List<RexNode> leftJoinKeys = new ArrayList<RexNode>();
- List<RexNode> rightJoinKeys = new ArrayList<RexNode>();
-
- RexNode nonEquiConds = RelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel,
- optiqJoinCond, leftJoinKeys, rightJoinKeys, null, null);
-
- if (!nonEquiConds.isAlwaysTrue()) {
- throw new SemanticException("Non equality condition not supported in Semi-Join"
- + nonEquiConds);
- }
-
- RelNode[] inputRels = new RelNode[] { leftRel, rightRel };
- final List<Integer> leftKeys = new ArrayList<Integer>();
- final List<Integer> rightKeys = new ArrayList<Integer>();
- optiqJoinCond = HiveOptiqUtil.projectNonColumnEquiConditions(
- HiveProjectRel.DEFAULT_PROJECT_FACTORY, inputRels, leftJoinKeys, rightJoinKeys, 0,
- leftKeys, rightKeys);
-
- joinRel = new SemiJoinRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION),
- inputRels[0], inputRels[1], optiqJoinCond, ImmutableIntList.copyOf(leftKeys),
- ImmutableIntList.copyOf(rightKeys));
- } else {
- joinRel = HiveJoinRel.getJoin(cluster, leftRel, rightRel, optiqJoinCond, optiqJoinType,
- leftSemiJoin);
- }
- // 5. Add new JoinRel & its RR to the maps
- relToHiveColNameOptiqPosMap.put(joinRel, this.buildHiveToOptiqColumnMap(joinRR, joinRel));
- relToHiveRR.put(joinRel, joinRR);
-
- return joinRel;
- }
-
- /**
- * Generate Join Logical Plan Relnode by walking through the join AST.
- *
- * @param qb
- * @param aliasToRel
- * Alias(Table/Relation alias) to RelNode; only read and not
- * written in to by this method
- * @return
- * @throws SemanticException
- */
- private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map<String, RelNode> aliasToRel)
- throws SemanticException {
- RelNode leftRel = null;
- RelNode rightRel = null;
- JoinType hiveJoinType = null;
-
- if (joinParseTree.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) {
- String msg = String.format("UNIQUE JOIN is currently not supported in CBO,"
- + " turn off cbo to use UNIQUE JOIN.");
- LOG.debug(msg);
- throw new OptiqSemanticException(msg);
- }
-
- // 1. Determine Join Type
- // TODO: What about TOK_CROSSJOIN, TOK_MAPJOIN
- switch (joinParseTree.getToken().getType()) {
- case HiveParser.TOK_LEFTOUTERJOIN:
- hiveJoinType = JoinType.LEFTOUTER;
- break;
- case HiveParser.TOK_RIGHTOUTERJOIN:
- hiveJoinType = JoinType.RIGHTOUTER;
- break;
- case HiveParser.TOK_FULLOUTERJOIN:
- hiveJoinType = JoinType.FULLOUTER;
- break;
- case HiveParser.TOK_LEFTSEMIJOIN:
- hiveJoinType = JoinType.LEFTSEMI;
- break;
- default:
- hiveJoinType = JoinType.INNER;
- break;
- }
-
- // 2. Get Left Table Alias
- ASTNode left = (ASTNode) joinParseTree.getChild(0);
- if ((left.getToken().getType() == HiveParser.TOK_TABREF)
- || (left.getToken().getType() == HiveParser.TOK_SUBQUERY)
- || (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
- String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0))
- .toLowerCase();
- String leftTableAlias = left.getChildCount() == 1 ? tableName : unescapeIdentifier(left
- .getChild(left.getChildCount() - 1).getText().toLowerCase());
- // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
- // partitionTableFunctionSource partitioningSpec? expression*)
- // guranteed to have an lias here: check done in processJoin
- leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(left
- .getChild(1).getText().toLowerCase())
- : leftTableAlias;
- leftRel = aliasToRel.get(leftTableAlias);
- } else if (isJoinToken(left)) {
- leftRel = genJoinLogicalPlan(left, aliasToRel);
- } else {
- assert (false);
- }
-
- // 3. Get Right Table Alias
- ASTNode right = (ASTNode) joinParseTree.getChild(1);
- if ((right.getToken().getType() == HiveParser.TOK_TABREF)
- || (right.getToken().getType() == HiveParser.TOK_SUBQUERY)
- || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
- String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0))
- .toLowerCase();
- String rightTableAlias = right.getChildCount() == 1 ? tableName : unescapeIdentifier(right
- .getChild(right.getChildCount() - 1).getText().toLowerCase());
- // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
- // partitionTableFunctionSource partitioningSpec? expression*)
- // guranteed to have an lias here: check done in processJoin
- rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(right
- .getChild(1).getText().toLowerCase())
- : rightTableAlias;
- rightRel = aliasToRel.get(rightTableAlias);
- } else {
- assert (false);
- }
-
- // 4. Get Join Condn
- ASTNode joinCond = (ASTNode) joinParseTree.getChild(2);
-
- // 5. Create Join rel
- return genJoinRelNode(leftRel, rightRel, hiveJoinType, joinCond);
- }
-
- private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticException {
- RowResolver rr = new RowResolver();
- HiveTableScanRel tableRel = null;
-
- try {
-
- // 1. If the table has a Sample specified, bail from Optiq path.
- if ( qb.getParseInfo().getTabSample(tableAlias) != null ||
- SemanticAnalyzer.this.nameToSplitSample.containsKey(tableAlias)) {
- String msg = String.format("Table Sample specified for %s." +
- " Currently we don't support Table Sample clauses in CBO," +
- " turn off cbo for queries on tableSamples.", tableAlias);
- LOG.debug(msg);
- throw new OptiqSemanticException(msg);
- }
-
- // 2. Get Table Metadata
- Table tab = qb.getMetaData().getSrcForAlias(tableAlias);
-
- // 3. Get Table Logical Schema (Row Type)
- // NOTE: Table logical schema = Non Partition Cols + Partition Cols +
- // Virtual Cols
-
- // 3.1 Add Column info for non partion cols (Object Inspector fields)
- StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer()
- .getObjectInspector();
- List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
- ColumnInfo colInfo;
- String colName;
- ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>();
- for (int i = 0; i < fields.size(); i++) {
- colName = fields.get(i).getFieldName();
- colInfo = new ColumnInfo(
- fields.get(i).getFieldName(),
- TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()),
- tableAlias, false);
- colInfo.setSkewedCol((isSkewedCol(tableAlias, qb, colName)) ? true : false);
- rr.put(tableAlias, colName, colInfo);
- cInfoLst.add(colInfo);
- }
- // TODO: Fix this
- ArrayList<ColumnInfo> nonPartitionColumns = new ArrayList<ColumnInfo>(cInfoLst);
- ArrayList<ColumnInfo> partitionColumns = new ArrayList<ColumnInfo>();
-
- // 3.2 Add column info corresponding to partition columns
- for (FieldSchema part_col : tab.getPartCols()) {
- colName = part_col.getName();
- colInfo = new ColumnInfo(colName,
- TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true);
- rr.put(tableAlias, colName, colInfo);
- cInfoLst.add(colInfo);
- partitionColumns.add(colInfo);
- }
-
- // 3.3 Add column info corresponding to virtual columns
- Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
- while (vcs.hasNext()) {
- VirtualColumn vc = vcs.next();
- colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true,
- vc.getIsHidden());
- rr.put(tableAlias, vc.getName(), colInfo);
- cInfoLst.add(colInfo);
- }
-
- // 3.4 Build row type from field <type, name>
- RelDataType rowType = TypeConverter.getType(cluster, rr, null);
-
- // 4. Build RelOptAbstractTable
- String fullyQualifiedTabName = tab.getDbName();
- if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty())
- fullyQualifiedTabName = fullyQualifiedTabName + "." + tab.getTableName();
- else
- fullyQualifiedTabName = tab.getTableName();
- RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName,
- tableAlias, rowType, tab, nonPartitionColumns, partitionColumns, conf, partitionCache,
- noColsMissingStats);
-
- // 5. Build Hive Table Scan Rel
- tableRel = new HiveTableScanRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION), optTable,
- rowType);
-
- // 6. Add Schema(RR) to RelNode-Schema map
- ImmutableMap<String, Integer> hiveToOptiqColMap = buildHiveToOptiqColumnMap(rr, tableRel);
- relToHiveRR.put(tableRel, rr);
- relToHiveColNameOptiqPosMap.put(tableRel, hiveToOptiqColMap);
- } catch (Exception e) {
- if (e instanceof SemanticException) {
- throw (SemanticException) e;
- } else {
- throw (new RuntimeException(e));
- }
- }
-
- return tableRel;
- }
-
- private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel) throws SemanticException {
- ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel));
- ImmutableMap<String, Integer> hiveColNameOptiqPosMap = this.relToHiveColNameOptiqPosMap
- .get(srcRel);
- RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(),
- hiveColNameOptiqPosMap, 0, true).convert(filterCondn);
- RelNode filterRel = new HiveFilterRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION),
- srcRel, convertedFilterExpr);
- this.relToHiveColNameOptiqPosMap.put(filterRel, hiveColNameOptiqPosMap);
- relToHiveRR.put(filterRel, relToHiveRR.get(srcRel));
- relToHiveColNameOptiqPosMap.put(filterRel, hiveColNameOptiqPosMap);
-
- return filterRel;
- }
-
- private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
- Map<String, RelNode> aliasToRel, boolean forHavingClause) throws SemanticException {
- /*
- * Handle Subquery predicates.
- *
- * Notes (8/22/14 hb): Why is this a copy of the code from {@link
- * #genFilterPlan} - for now we will support the same behavior as non CBO
- * route. - but plan to allow nested SubQueries(Restriction.9.m) and
- * multiple SubQuery expressions(Restriction.8.m). This requires use to
- * utilize Optiq's Decorrelation mechanics, and for Optiq to fix/flush out
- * Null semantics(OPTIQ-373) - besides only the driving code has been
- * copied. Most of the code which is SubQueryUtils and QBSubQuery is
- * reused.
- */
- int numSrcColumns = srcRel.getRowType().getFieldCount();
- List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
- if (subQueriesInOriginalTree.size() > 0) {
-
- /*
- * Restriction.9.m :: disallow nested SubQuery expressions.
- */
- if (qb.getSubQueryPredicateDef() != null) {
- throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
- subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported."));
- }
-
- /*
- * Restriction.8.m :: We allow only 1 SubQuery expression per Query.
- */
- if (subQueriesInOriginalTree.size() > 1) {
-
- throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
- subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
- }
-
- /*
- * Clone the Search AST; apply all rewrites on the clone.
- */
- ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
- List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
-
- RowResolver inputRR = relToHiveRR.get(srcRel);
- RowResolver outerQBRR = inputRR;
- ImmutableMap<String, Integer> outerQBPosMap =
- relToHiveColNameOptiqPosMap.get(srcRel);
-
- for (int i = 0; i < subQueries.size(); i++) {
- ASTNode subQueryAST = subQueries.get(i);
- ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
-
- int sqIdx = qb.incrNumSubQueryPredicates();
- clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
-
- QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST,
- originalSubQueryAST, ctx);
-
- if (!forHavingClause) {
- qb.setWhereClauseSubQueryPredicate(subQuery);
- } else {
- qb.setHavingClauseSubQueryPredicate(subQuery);
- }
- String havingInputAlias = null;
-
- if (forHavingClause) {
- havingInputAlias = "gby_sq" + sqIdx;
- aliasToRel.put(havingInputAlias, srcRel);
- }
-
- subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias,
- aliasToRel.keySet());
-
- QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
- qbSQ.setSubQueryDef(subQuery.getSubQuery());
- Phase1Ctx ctx_1 = initPhase1Ctx();
- doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1);
- getMetaData(qbSQ);
- RelNode subQueryRelNode = genLogicalPlan(qbSQ);
- aliasToRel.put(subQuery.getAlias(), subQueryRelNode);
- RowResolver sqRR = relToHiveRR.get(subQueryRelNode);
-
- /*
- * Check.5.h :: For In and Not In the SubQuery must implicitly or
- * explicitly only contain one select item.
- */
- if (subQuery.getOperator().getType() != SubQueryType.EXISTS
- && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
- && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) {
- throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST,
- "SubQuery can contain only 1 item in Select List."));
- }
-
- /*
- * If this is a Not In SubQuery Predicate then Join in the Null Check
- * SubQuery. See QBSubQuery.NotInCheck for details on why and how this
- * is constructed.
- */
- if (subQuery.getNotInCheck() != null) {
- QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
- notInCheck.setSQRR(sqRR);
- QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true);
- qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery());
- ctx_1 = initPhase1Ctx();
- doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1);
- getMetaData(qbSQ_nic);
- RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic);
- aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode);
- srcRel = genJoinRelNode(srcRel, subQueryNICRelNode,
- // set explicitly to inner until we figure out SemiJoin use
- // notInCheck.getJoinType(),
- JoinType.INNER, notInCheck.getJoinConditionAST());
- inputRR = relToHiveRR.get(srcRel);
- if (forHavingClause) {
- aliasToRel.put(havingInputAlias, srcRel);
- }
- }
-
- /*
- * Gen Join between outer Operator and SQ op
- */
- subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias);
- srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(),
- subQuery.getJoinConditionAST());
- searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
-
- srcRel = genFilterRelNode(searchCond, srcRel);
-
- /*
- * For Not Exists and Not In, add a projection on top of the Left
- * Outer Join.
- */
- if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
- || subQuery.getOperator().getType() != SubQueryType.NOT_IN) {
- srcRel = projectLeftOuterSide(srcRel, numSrcColumns);
- }
- }
- relToHiveRR.put(srcRel, outerQBRR);
- relToHiveColNameOptiqPosMap.put(srcRel, outerQBPosMap);
- return srcRel;
- }
-
- return genFilterRelNode(searchCond, srcRel);
- }
-
- private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException {
- RowResolver iRR = relToHiveRR.get(srcRel);
- RowResolver oRR = new RowResolver();
- RowResolver.add(oRR, iRR, 0, numColumns);
-
- List<RexNode> optiqColLst = new ArrayList<RexNode>();
- List<String> oFieldNames = new ArrayList<String>();
- RelDataType iType = srcRel.getRowType();
-
- for (int i = 0; i < iType.getFieldCount(); i++) {
- RelDataTypeField fType = iType.getFieldList().get(i);
- String fName = iType.getFieldNames().get(i);
- optiqColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i));
- oFieldNames.add(fName);
- }
-
- HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames);
-
- this.relToHiveColNameOptiqPosMap.put(selRel, buildHiveToOptiqColumnMap(oRR, selRel));
- this.relToHiveRR.put(selRel, oRR);
- return selRel;
- }
-
- private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map<String, RelNode> aliasToRel,
- boolean forHavingClause) throws SemanticException {
- RelNode filterRel = null;
-
- Iterator<ASTNode> whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values()
- .iterator();
- if (whereClauseIterator.hasNext()) {
- filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel,
- aliasToRel, forHavingClause);
- }
-
- return filterRel;
- }
-
- /**
- * Class to store GenericUDAF related information.
- */
- private class AggInfo {
- private final List<ExprNodeDesc> m_aggParams;
- private final TypeInfo m_returnType;
- private final String m_udfName;
- private final boolean m_distinct;
-
- private AggInfo(List<ExprNodeDesc> aggParams, TypeInfo returnType, String udfName,
- boolean isDistinct) {
- m_aggParams = aggParams;
- m_returnType = returnType;
- m_udfName = udfName;
- m_distinct = isDistinct;
- }
- }
-
- private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List<RexNode> gbChildProjLst,
- RexNodeConverter converter, HashMap<String, Integer> rexNodeToPosMap,
- Integer childProjLstIndx) throws SemanticException {
-
- // 1. Get agg fn ret type in Optiq
- RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType,
- this.cluster.getTypeFactory());
-
- // 2. Convert Agg Fn args and type of args to Optiq
- // TODO: Does HQL allows expressions as aggregate args or can it only be
- // projections from child?
- Integer inputIndx;
- List<Integer> argList = new ArrayList<Integer>();
- RexNode rexNd = null;
- RelDataTypeFactory dtFactory = this.cluster.getTypeFactory();
- ImmutableList.Builder<RelDataType> aggArgRelDTBldr = new ImmutableList.Builder<RelDataType>();
- for (ExprNodeDesc expr : agg.m_aggParams) {
- rexNd = converter.convert(expr);
- inputIndx = rexNodeToPosMap.get(rexNd.toString());
- if (inputIndx == null) {
- gbChildProjLst.add(rexNd);
- rexNodeToPosMap.put(rexNd.toString(), childProjLstIndx);
- inputIndx = childProjLstIndx;
- childProjLstIndx++;
- }
- argList.add(inputIndx);
-
- // TODO: does arg need type cast?
- aggArgRelDTBldr.add(TypeConverter.convert(expr.getTypeInfo(), dtFactory));
- }
-
- // 3. Get Aggregation FN from Optiq given name, ret type and input arg
- // type
- final Aggregation aggregation = SqlFunctionConverter.getOptiqAggFn(agg.m_udfName,
- aggArgRelDTBldr.build(), aggFnRetType);
-
- return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null);
- }
-
- private RelNode genGBRelNode(List<ExprNodeDesc> gbExprs, List<AggInfo> aggInfoLst,
- RelNode srcRel) throws SemanticException {
- RowResolver gbInputRR = this.relToHiveRR.get(srcRel);
- ImmutableMap<String, Integer> posMap = this.relToHiveColNameOptiqPosMap.get(srcRel);
- RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(),
- posMap, 0, false);
-
- final List<RexNode> gbChildProjLst = Lists.newArrayList();
- final HashMap<String, Integer> rexNodeToPosMap = new HashMap<String, Integer>();
- final BitSet groupSet = new BitSet();
- Integer gbIndx = 0;
- RexNode rnd;
- for (ExprNodeDesc key : gbExprs) {
- rnd = converter.convert(key);
- gbChildProjLst.add(rnd);
- groupSet.set(gbIndx);
- rexNodeToPosMap.put(rnd.toString(), gbIndx);
- gbIndx++;
- }
-
- List<AggregateCall> aggregateCalls = Lists.newArrayList();
- int i = aggInfoLst.size();
- for (AggInfo agg : aggInfoLst) {
- aggregateCalls.add(convertGBAgg(agg, srcRel, gbChildProjLst, converter, rexNodeToPosMap,
- gbChildProjLst.size()));
- }
-
- if (gbChildProjLst.isEmpty()) {
- // This will happen for count(*), in such cases we arbitarily pick
- // first element from srcRel
- gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0));
- }
- RelNode gbInputRel = HiveProjectRel.create(srcRel, gbChildProjLst, null);
-
- HiveRel aggregateRel = null;
- try {
- aggregateRel = new HiveAggregateRel(cluster, cluster.traitSetOf(HiveRel.CONVENTION),
- gbInputRel, groupSet, aggregateCalls);
- } catch (InvalidRelException e) {
- throw new SemanticException(e);
- }
-
- return aggregateRel;
- }
-
- private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo,
- RowResolver gByInputRR, RowResolver gByRR) {
- if (gByExpr.getType() == HiveParser.DOT
- && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) {
- String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getChild(0)
- .getText());
- String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(1).getText());
- gByRR.put(tab_alias, col_alias, colInfo);
- } else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) {
- String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getText());
- String tab_alias = null;
- /*
- * If the input to the GBy has a tab alias for the column, then add an
- * entry based on that tab_alias. For e.g. this query: select b.x,
- * count(*) from t1 b group by x needs (tab_alias=b, col_alias=x) in the
- * GBy RR. tab_alias=b comes from looking at the RowResolver that is the
- * ancestor before any GBy/ReduceSinks added for the GBY operation.
- */
- try {
- ColumnInfo pColInfo = gByInputRR.get(tab_alias, col_alias);
- tab_alias = pColInfo == null ? null : pColInfo.getTabAlias();
- } catch (SemanticException se) {
- }
- gByRR.put(tab_alias, col_alias, colInfo);
- }
- }
-
- private void addToGBExpr(RowResolver groupByOutputRowResolver,
- RowResolver groupByInputRowResolver, ASTNode grpbyExpr, ExprNodeDesc grpbyExprNDesc,
- List<ExprNodeDesc> gbExprNDescLst, List<String> outputColumnNames) {
- // TODO: Should we use grpbyExprNDesc.getTypeInfo()? what if expr is
- // UDF
- int i = gbExprNDescLst.size();
- String field = getColumnInternalName(i);
- outputColumnNames.add(field);
- gbExprNDescLst.add(grpbyExprNDesc);
-
- ColumnInfo oColInfo = new ColumnInfo(field, grpbyExprNDesc.getTypeInfo(), null, false);
- groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo);
-
- addAlternateGByKeyMappings(grpbyExpr, oColInfo, groupByInputRowResolver,
- groupByOutputRowResolver);
- }
-
- private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR)
- throws SemanticException {
- AggInfo aInfo = null;
-
- // 1 Convert UDAF Params to ExprNodeDesc
- ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
- for (int i = 1; i <= aggFnLstArgIndx; i++) {
- ASTNode paraExpr = (ASTNode) aggAst.getChild(i);
- ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR);
- aggParameters.add(paraExprNode);
- }
-
- // 2. Is this distinct UDAF
- boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI;
-
- // 3. Determine type of UDAF
- TypeInfo udafRetType = null;
-
- // 3.1 Obtain UDAF name
- String aggName = unescapeIdentifier(aggAst.getChild(0).getText());
-
- // 3.2 Rank functions type is 'int'/'double'
- if (FunctionRegistry.isRankingFunction(aggName)) {
- if (aggName.equalsIgnoreCase("percent_rank"))
- udafRetType = TypeInfoFactory.doubleTypeInfo;
- else
- udafRetType = TypeInfoFactory.intTypeInfo;
- } else {
- // 3.3 Try obtaining UDAF evaluators to determine the ret type
- try {
- boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR;
-
- // 3.3.1 Get UDAF Evaluator
- Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct);
-
- GenericUDAFEvaluator genericUDAFEvaluator = null;
- if (aggName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME)
- || aggName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) {
- ArrayList<ObjectInspector> originalParameterTypeInfos =
- getWritableObjectInspector(aggParameters);
- genericUDAFEvaluator =
- FunctionRegistry.getGenericWindowingEvaluator(aggName,
- originalParameterTypeInfos, isDistinct, isAllColumns);
- GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
- udafRetType = ((ListTypeInfo)udaf.returnType).getListElementTypeInfo();
- } else {
- genericUDAFEvaluator = getGenericUDAFEvaluator(aggName,
- aggParameters, aggAst, isDistinct, isAllColumns);
- assert (genericUDAFEvaluator != null);
-
- // 3.3.2 Get UDAF Info using UDAF Evaluator
- GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
- udafRetType = udaf.returnType;
- }
- } catch (Exception e) {
- LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName
- + ", trying to translate to GenericUDF");
- }
-
- // 3.4 Try GenericUDF translation
- if (udafRetType == null) {
- TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
- // We allow stateful functions in the SELECT list (but nowhere else)
- tcCtx.setAllowStatefulFunctions(true);
- tcCtx.setAllowDistinctFunctions(false);
- ExprNodeDesc exp = genExprNodeDesc((ASTNode) aggAst.getChild(0), inputRR, tcCtx);
- udafRetType = exp.getTypeInfo();
- }
- }
-
- // 4. Construct AggInfo
- aInfo = new AggInfo(aggParameters, udafRetType, aggName, isDistinct);
-
- return aInfo;
- }
-
- /**
- * Generate GB plan.
- *
- * @param qb
- * @param srcRel
- * @return TODO: 1. Grouping Sets (roll up..)
- * @throws SemanticException
- */
- private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
- RelNode gbRel = null;
- QBParseInfo qbp = getQBParseInfo(qb);
-
- // 0. for GSets, Cube, Rollup, bail from Optiq path.
- if (!qbp.getDestRollups().isEmpty()
- || !qbp.getDestGroupingSets().isEmpty()
- || !qbp.getDestCubes().isEmpty()) {
- String gbyClause = null;
- HashMap<String, ASTNode> gbysMap = qbp.getDestToGroupBy();
- if (gbysMap.size() == 1) {
- ASTNode gbyAST = gbysMap.entrySet().iterator().next().getValue();
- gbyClause = SemanticAnalyzer.this.ctx.getTokenRewriteStream()
- .toString(gbyAST.getTokenStartIndex(),
- gbyAST.getTokenStopIndex());
- gbyClause = "in '" + gbyClause + "'.";
- } else {
- gbyClause = ".";
- }
- String msg = String.format("Encountered Grouping Set/Cube/Rollup%s"
- + " Currently we don't support Grouping Set/Cube/Rollup"
- + " clauses in CBO," + " turn off cbo for these queries.",
- gbyClause);
- LOG.debug(msg);
- throw new OptiqSemanticException(msg);
- }
-
- // 1. Gather GB Expressions (AST) (GB + Aggregations)
- // NOTE: Multi Insert is not supported
- String detsClauseName = qbp.getClauseNames().iterator().next();
- List<ASTNode> grpByAstExprs = getGroupByForClause(qbp, detsClauseName);
- HashMap<String, ASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName);
- boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false;
- boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true
- : false;
-
- if (hasGrpByAstExprs || hasAggregationTrees) {
- ArrayList<ExprNodeDesc> gbExprNDescLst = new ArrayList<ExprNodeDesc>();
- ArrayList<String> outputColumnNames = new ArrayList<String>();
-
- // 2. Input, Output Row Resolvers
- RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel);
- RowResolver groupByOutputRowResolver = new RowResolver();
- groupByOutputRowResolver.setIsExprResolver(true);
-
- if (hasGrpByAstExprs) {
- // 3. Construct GB Keys (ExprNode)
- for (int i = 0; i < grpByAstExprs.size(); ++i) {
- ASTNode grpbyExpr = grpByAstExprs.get(i);
- Map<ASTNode, ExprNodeDesc> astToExprNDescMap = TypeCheckProcFactory.genExprNode(
- grpbyExpr, new TypeCheckCtx(groupByInputRowResolver));
- ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr);
- if (grpbyExprNDesc == null)
- throw new RuntimeException("Invalid Column Reference: " + grpbyExpr.dump());
-
- addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr,
- grpbyExprNDesc, gbExprNDescLst, outputColumnNames);
- }
- }
-
- // 4. Construct aggregation function Info
- ArrayList<AggInfo> aggregations = new ArrayList<AggInfo>();
- if (hasAggregationTrees) {
- assert (aggregationTrees != null);
- for (ASTNode value : aggregationTrees.values()) {
- // 4.1 Determine type of UDAF
- // This is the GenericUDAF name
- String aggName = unescapeIdentifier(value.getChild(0).getText());
- boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
- boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
-
- // 4.2 Convert UDAF Params to ExprNodeDesc
- ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
- for (int i = 1; i < value.getChildCount(); i++) {
- ASTNode paraExpr = (ASTNode) value.getChild(i);
- ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver);
- aggParameters.add(paraExprNode);
- }
-
- Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct);
- GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName,
- aggParameters, value, isDistinct, isAllColumns);
- assert (genericUDAFEvaluator != null);
- GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
- AggInfo aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct);
- aggregations.add(aInfo);
- String field = getColumnInternalName(gbExprNDescLst.size() + aggregations.size() - 1);
- outputColumnNames.add(field);
- groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, aInfo.m_returnType,
- "", false));
- }
- }
-
- gbRel = genGBRelNode(gbExprNDescLst, aggregations, srcRel);
- relToHiveColNameOptiqPosMap.put(gbRel,
- buildHiveToOptiqColumnMap(groupByOutputRowResolver, gbRel));
- this.relToHiveRR.put(gbRel, groupByOutputRowResolver);
- }
-
- return gbRel;
- }
-
- private RelNode genOBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
- RelNode relToRet = null;
-
- QBParseInfo qbp = getQBParseInfo(qb);
- String dest = qbp.getClauseNames().iterator().next();
- ASTNode obAST = qbp.getOrderByForClause(dest);
-
- if (obAST != null) {
- // 1. OB Expr sanity test
- // in strict mode, in the presence of order by, limit must be specified
- Integer limit = qb.getParseInfo().getDestLimit(dest);
- if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")
- && limit == null) {
- throw new SemanticException(generateErrorMessage(obAST,
- ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg()));
- }
-
- // 2. Walk through OB exprs and extract field collations and additional
- // virtual columns needed
- final List<RexNode> newVCLst = new ArrayList<RexNode>();
- final List<RelFieldCollation> fieldCollations = Lists.newArrayList();
- int fieldIndex = 0;
-
- List<Node> obASTExprLst = obAST.getChildren();
- ASTNode obASTExpr;
- List<Pair<ASTNode, TypeInfo>> vcASTTypePairs = new ArrayList<Pair<ASTNode, TypeInfo>>();
- RowResolver inputRR = relToHiveRR.get(srcRel);
- RowResolver outputRR = new RowResolver();
-
- RexNode rnd;
- RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(),
- relToHiveColNameOptiqPosMap.get(srcRel), 0, false);
- int srcRelRecordSz = srcRel.getRowType().getFieldCount();
-
- for (int i = 0; i < obASTExprLst.size(); i++) {
- // 2.1 Convert AST Expr to ExprNode
- obASTExpr = (ASTNode) obASTExprLst.get(i);
- Map<ASTNode, ExprNodeDesc> astToExprNDescMap = TypeCheckProcFactory.genExprNode(
- obASTExpr, new TypeCheckCtx(inputRR));
- ExprNodeDesc obExprNDesc = astToExprNDescMap.get(obASTExpr.getChild(0));
- if (obExprNDesc == null)
- throw new SemanticException("Invalid order by expression: " + obASTExpr.toString());
-
- // 2.2 Convert ExprNode to RexNode
- rnd = converter.convert(obExprNDesc);
-
- // 2.3 Determine the index of ob expr in child schema
- // NOTE: Optiq can not take compound exprs in OB without it being
- // present in the child (& hence we add a child Project Rel)
- if (rnd instanceof RexInputRef) {
- fieldIndex = ((RexInputRef) rnd).getIndex();
- } else {
- fieldIndex = srcRelRecordSz + newVCLst.size();
- newVCLst.add(rnd);
- vcASTTypePairs.add(new Pair<ASTNode, TypeInfo>((ASTNode) obASTExpr.getChild(0),
- obExprNDesc.getTypeInfo()));
- }
-
- // 2.4 Determine the Direction of order by
- org.eigenbase.rel.RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING;
- if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) {
- order = RelFieldCollation.Direction.ASCENDING;
- }
-
- // 2.5 Add to field collations
- fieldCollations.add(new RelFieldCollation(fieldIndex, order));
- }
-
- // 3. Add Child Project Rel if needed
- RelNode obInputRel = srcRel;
- if (!newVCLst.isEmpty()) {
- List<RexNode> originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(),
- new Function<RelDataTypeField, RexNode>() {
- @Override
- public RexNode apply(RelDataTypeField input) {
- return new RexInputRef(input.getIndex(), input.getType());
- }
- });
-
- obInputRel = HiveProjectRel.create(srcRel, CompositeList.of(originalInputRefs, newVCLst),
- null);
- }
-
- // 4. Construct SortRel
- RelTraitSet traitSet = cluster.traitSetOf(HiveRel.CONVENTION);
- RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations));
- // TODO: Is it better to introduce a
- // project on top to restrict VC from showing up in sortRel type
- RelNode sortRel = new HiveSortRel(cluster, traitSet, obInputRel, canonizedCollation,
- null, null);
-
- // 5. Construct OB Parent Rel If needed
- // Construct a parent Project if OB has virtual columns(vc) otherwise
- // vc would show up in the result
- // TODO: If OB is part of sub query & Parent Query select is not of the
- // type "select */.*..." then parent project is not needed
- relToRet = sortRel;
- if (!newVCLst.isEmpty()) {
- List<RexNode> obParentRelProjs = Lists.transform(srcRel.getRowType().getFieldList(),
- new Function<RelDataTypeField, RexNode>() {
- @Override
- public RexNode apply(RelDataTypeField input) {
- return new RexInputRef(input.getIndex(), input.getType());
- }
- });
-
- relToRet = HiveProjectRel.create(sortRel, obParentRelProjs, null);
- }
-
- // 6. Construct output RR
- RowResolver.add(outputRR, inputRR, 0);
-
- // 7. Update the maps
- // NOTE: Output RR for SortRel is considered same as its input; we may
- // end up not using VC that is present in sort rel. Also note that
- // rowtype of sortrel is the type of it child; if child happens to be
- // synthetic project that we introduced then that projectrel would
- // contain the vc.
- ImmutableMap<String, Integer> hiveColNameOptiqPosMap = buildHiveToOptiqColumnMap(outputRR,
- relToRet);
- relToHiveRR.put(relToRet, outputRR);
- relToHiveColNameOptiqPosMap.put(relToRet, hiveColNameOptiqPosMap);
- }
-
- return relToRet;
- }
-
- private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
- HiveRel sortRel = null;
- QBParseInfo qbp = getQBParseInfo(qb);
- Integer limit = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next());
-
- if (limit != null) {
- RexNode fetch = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit));
- RelTraitSet traitSet = cluster.traitSetOf(HiveRel.CONVENTION);
- RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.EMPTY);
- sortRel = new HiveSortRel(cluster, traitSet, srcRel, canonizedCollation, null, fetch);
-
- RowResolver outputRR = new RowResolver();
- RowResolver.add(outputRR, relToHiveRR.get(srcRel), 0);
- ImmutableMap<String, Integer> hiveColNameOptiqPosMap = buildHiveToOptiqColumnMap(outputRR,
- sortRel);
- relToHiveRR.put(sortRel, outputRR);
- relToHiveColNameOptiqPosMap.put(sortRel, hiveColNameOptiqPosMap);
- }
-
- return sortRel;
- }
-
- List<RexNode> getPartitionKeys(PartitionSpec ps, RexNodeConverter converter, RowResolver inputRR)
- throws SemanticException {
- List<RexNode> pKeys = new ArrayList<RexNode>();
- if (ps != null) {
- List<PartitionExpression> pExprs = ps.getExpressions();
- for (PartitionExpression pExpr : pExprs) {
- TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
- tcCtx.setAllowStatefulFunctions(true);
- ExprNodeDesc exp = genExprNodeDesc(pExpr.getExpression(), inputRR, tcCtx);
- pKeys.add(converter.convert(exp));
- }
- }
-
- return pKeys;
- }
-
- List<RexFieldCollation> getOrderKeys(OrderSpec os, RexNodeConverter converter,
- RowResolver inputRR) throws SemanticException {
- List<RexFieldCollation> oKeys = new ArrayList<RexFieldCollation>();
- if (os != null) {
- List<OrderExpression> oExprs = os.getExpressions();
- for (OrderExpression oExpr : oExprs) {
- TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
- tcCtx.setAllowStatefulFunctions(true);
- ExprNodeDesc exp = genExprNodeDesc(oExpr.getExpression(), inputRR, tcCtx);
- RexNode ordExp = converter.convert(exp);
- Set<SqlKind> flags = new HashSet<SqlKind>();
- if (oExpr.getOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC)
- flags.add(SqlKind.DESCENDING);
- oKeys.add(new RexFieldCollation(ordExp, flags));
- }
- }
-
- return oKeys;
- }
-
- RexWindowBound getBound(BoundarySpec bs, RexNodeConverter converter) {
- RexWindowBound rwb = null;
-
- if (bs != null) {
- SqlNode sn = null;
- SqlParserPos pos = new SqlParserPos(1, 1);
- SqlNode amt = bs.getAmt() == 0 ? null : SqlLiteral.createExactNumeric(
- String.valueOf(bs.getAmt()), new SqlParserPos(2, 2));
- RexNode amtLiteral = null;
- SqlCall sc = null;
- RexNode rn = null;
-
- if (amt != null)
- amtLiteral = cluster.getRexBuilder().makeLiteral(new Integer(bs.getAmt()),
- cluster.getTypeFactory().createSqlType(SqlTypeName.INTEGER), true);
-
- switch (bs.getDirection()) {
- case PRECEDING:
- if (amt == null) {
- rwb = RexWindowBound.create(SqlWindow.createUnboundedPreceding(pos), null);
- } else {
- sc = (SqlCall) SqlWindow.createPreceding(amt, pos);
- rwb = RexWindowBound.create(sc,
- cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral));
- }
- break;
-
- case CURRENT:
- rwb = RexWindowBound.create(SqlWindow.createCurrentRow(new SqlParserPos(1, 1)), null);
- break;
-
- case FOLLOWING:
- if (amt == null) {
- rwb = RexWindowBound.create(SqlWindow.createUnboundedFollowing(new SqlParserPos(1, 1)),
- null);
- } else {
- sc = (SqlCall) SqlWindow.createFollowing(amt, pos);
- rwb = RexWindowBound.create(sc,
- cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral));
- }
- break;
- }
- }
-
- return rwb;
- }
-
- int getWindowSpecIndx(ASTNode wndAST) {
- int wndASTIndx = -1;
- int wi = wndAST.getChildCount() - 1;
- if (wi <= 0 || (wndAST.getChild(wi).getType() != HiveParser.TOK_WINDOWSPEC)) {
- wi = -1;
- }
-
- return wi;
- }
-
- Pair<RexNode, TypeInfo> genWindowingProj(QB qb, WindowExpressionSpec wExpSpec, RelNode srcRel)
- throws SemanticException {
- RexNode w = null;
- TypeInfo wHiveRetType = null;
-
- if (wExpSpec instanceof WindowFunctionSpec) {
- WindowFunctionSpec wFnSpec = (WindowFunctionSpec) wExpSpec;
- ASTNode windowProjAst = wFnSpec.getExpression();
- // TODO: do we need to get to child?
- int wndSpecASTIndx = getWindowSpecIndx(windowProjAst);
- // 2. Get Hive Aggregate Info
- AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1,
- this.relToHiveRR.get(srcRel));
-
- // 3. Get Optiq Return type for Agg Fn
- wHiveRetType = hiveAggInfo.m_returnType;
- RelDataType optiqAggFnRetType = TypeConverter.convert(hiveAggInfo.m_returnType,
- this.cluster.getTypeFactory());
-
- // 4. Convert Agg Fn args to Optiq
- ImmutableMap<String, Integer> posMap = this.relToHiveColNameOptiqPosMap.get(srcRel);
- RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(),
- posMap, 0, false);
- Builder<RexNode> optiqAggFnArgsBldr = ImmutableList.<RexNode> builder();
- Builder<RelDataType> optiqAggFnArgsTypeBldr = ImmutableList.<RelDataType> builder();
- RexNode rexNd = null;
- for (int i = 0; i < hiveAggInfo.m_aggParams.size(); i++) {
- optiqAggFnArgsBldr.add(converter.convert(hiveAggInfo.m_aggParams.get(i)));
- optiqAggFnArgsTypeBldr.add(TypeConverter.convert(hiveAggInfo.m_aggParams.get(i)
- .getTypeInfo(), this.cluster.getTypeFactory()));
- }
- ImmutableList<RexNode> optiqAggFnArgs = optiqAggFnArgsBldr.build();
- ImmutableList<RelDataType> optiqAggFnArgsType = optiqAggFnArgsTypeBldr.build();
-
- // 5. Get Optiq Agg Fn
- final SqlAggFunction optiqAggFn = SqlFunctionConverter.getOptiqAggFn(hiveAggInfo.m_udfName,
- optiqAggFnArgsType, optiqAggFnRetType);
-
- // 6. Translate Window spec
- RowResolver inputRR = relToHiveRR.get(srcRel);
- WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec();
- List<RexNode> partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR);
- List<RexFieldCollation> orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR);
- RexWindowBound upperBound = getBound(wndSpec.windowFrame.start, converter);
- RexWindowBound lowerBound = getBound(wndSpec.windowFrame.end, converter);
- boolean isRows = ((wndSpec.windowFrame.start instanceof RangeBoundarySpec) || (wndSpec.windowFrame.end instanceof RangeBoundarySpec)) ? true
- : false;
-
- w = cluster.getRexBuilder().makeOver(optiqAggFnRetType, optiqAggFn, optiqAggFnArgs,
- partitionKeys, ImmutableList.<RexFieldCollation> copyOf(orderKeys), lowerBound,
- upperBound, isRows, true, false);
- } else {
- // TODO: Convert to Semantic Exception
- throw new RuntimeException("Unsupported window Spec");
- }
-
- return new Pair(w, wHiveRetType);
- }
-
- private RelNode genSelectForWindowing(QB qb, RelNode srcRel) throws SemanticException {
- RelNode selOpForWindow = null;
- QBParseInfo qbp = getQBParseInfo(qb);
- WindowingSpec wSpec = (!qb.getAllWindowingSpecs().isEmpty()) ? qb.getAllWindowingSpecs()
- .values().iterator().next() : null;
-
- if (wSpec != null) {
- // 1. Get valid Window Function Spec
- wSpec.validateAndMakeEffective();
- List<WindowExpressionSpec> windowExpressions = wSpec.getWindowExpressions();
-
- if (windowExpressions != null && !windowExpressions.isEmpty()) {
- RowResolver inputRR = this.relToHiveRR.get(srcRel);
- // 2. Get RexNodes for original Projections from below
- List<RexNode> projsForWindowSelOp = new ArrayList<RexNode>(
- HiveOptiqUtil.getProjsFromBelowAsInputRef(srcRel));
-
- // 3. Construct new Row Resolver with everything from below.
- RowResolver out_rwsch = new RowResolver();
- RowResolver.add(out_rwsch, inputRR, 0);
-
- // 4. Walk through Window Expressions & Construct RexNodes for those,
- // Update out_rwsch
- for (WindowExpressionSpec wExprSpec : windowExpressions) {
- if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) {
- Pair<RexNode, TypeInfo> wtp = genWindowingProj(qb, wExprSpec, srcRel);
- projsForWindowSelOp.add(wtp.getKey());
-
- // 6.2.2 Update Output Row Schema
- ColumnInfo oColInfo = new ColumnInfo(
- getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(), null, false);
- if (false) {
- out_rwsch.checkColumn(null, wExprSpec.getAlias());
- out_rwsch.put(null, wExprSpec.getAlias(), oColInfo);
- } else {
- out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo);
- }
- }
- }
-
- selOpForWindow = genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel);
- }
- }
-
- return selOpForWindow;
- }
-
- private RelNode genSelectRelNode(List<RexNode> optiqColLst, RowResolver out_rwsch,
- RelNode srcRel) throws OptiqSemanticException {
- // 1. Build Column Names
- // TODO: Should this be external names
- ArrayList<String> columnNames = new ArrayList<String>();
- for (int i = 0; i < optiqColLst.size(); i++) {
- columnNames.add(getColumnInternalName(i));
- }
-
- // 2. Prepend column names with '_o_'
- /*
- * Hive treats names that start with '_c' as internalNames; so change the
- * names so we don't run into this issue when converting back to Hive AST.
- */
- List<String> oFieldNames = Lists.transform(columnNames, new Function<String, String>() {
- @Override
- public String apply(String hName) {
- return "_o_" + hName;
- }
- });
-
- // 3 Build Optiq Rel Node for project using converted projections & col
- // names
- HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames);
-
- // 4. Keep track of colname-to-posmap && RR for new select
- this.relToHiveColNameOptiqPosMap.put(selRel, buildHiveToOptiqColumnMap(out_rwsch, selRel));
- this.relToHiveRR.put(selRel, out_rwsch);
-
- return selRel;
- }
-
- /**
- * NOTE: there can only be one select caluse since we don't handle multi
- * destination insert.
- *
- * @throws SemanticException
- */
- private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
-
- // 0. Generate a Select Node for Windowing
- RelNode selForWindow = genSelectForWindowing(qb, srcRel);
- srcRel = (selForWindow == null) ? srcRel : selForWindow;
-
- boolean subQuery;
- ArrayList<ExprNodeDesc> col_list = new ArrayList<ExprNodeDesc>();
- ArrayList<Pair<Integer, RexNode>> windowingRexNodes = new ArrayList<Pair<Integer, RexNode>>();
-
- // 1. Get Select Expression List
- QBParseInfo qbp = getQBParseInfo(qb);
- String selClauseName = qbp.getClauseNames().iterator().next();
- ASTNode selExprList = qbp.getSelForClause(selClauseName);
-
- // 2.Row resolvers for input, output
- RowResolver out_rwsch = new RowResolver();
- ASTNode trfm = null;
- Integer pos = Integer.valueOf(0);
- RowResolver inputRR = this.relToHiveRR.get(srcRel);
-
- // 3. Query Hints
- // TODO: Handle Query Hints; currently we ignore them
- boolean selectStar = false;
- int posn = 0;
- boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST);
- if (hintPresent) {
- String hint = SemanticAnalyzer.this.ctx.getTokenRewriteStream().
- toString(
- selExprList.getChild(0).getTokenStartIndex(),
- selExprList.getChild(0).getTokenStopIndex());
- String msg = String.format("Hint specified for %s."
- + " Currently we don't support hints in CBO, turn off cbo to use hints.", hint);
- LOG.debug(msg);
- throw new OptiqSemanticException(msg);
- }
-
- // 4. Determine if select corresponds to a subquery
- subQuery = qb.getParseInfo().getIsSubQ();
-
- // 4. Bailout if select involves Transform
- boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM);
- if (isInTransform) {
- String msg = String.format("SELECT TRANSFORM is currently not supported in CBO,"
- + " turn off cbo to use TRANSFORM.");
[... 380 lines stripped ...]