You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/06/24 08:32:31 UTC
svn commit: r1605013 [3/3] - in /hive/branches/cbo: ./
common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/
ql/src/java/org/apache/hadoop/hive/ql/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/...
Added: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java?rev=1605013&view=auto
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java (added)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java Tue Jun 24 06:32:30 2014
@@ -0,0 +1,261 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.translator;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeFactory;
+import org.eigenbase.sql.SqlAggFunction;
+import org.eigenbase.sql.SqlFunction;
+import org.eigenbase.sql.SqlFunctionCategory;
+import org.eigenbase.sql.SqlKind;
+import org.eigenbase.sql.SqlOperator;
+import org.eigenbase.sql.fun.SqlStdOperatorTable;
+import org.eigenbase.sql.type.OperandTypes;
+import org.eigenbase.sql.type.ReturnTypes;
+import org.eigenbase.sql.type.SqlReturnTypeInference;
+import org.eigenbase.sql.type.SqlTypeName;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
+public class SqlFunctionConverter {
+ static final Map<String, SqlOperator> operatorMap;
+ static final Map<String, SqlOperator> hiveToOptiq;
+ static final Map<SqlOperator, HiveToken> optiqToHiveToken;
+
+ static {
+ Builder builder = new Builder();
+ operatorMap = ImmutableMap.copyOf(builder.operatorMap);
+ hiveToOptiq = ImmutableMap.copyOf(builder.hiveToOptiq);
+ optiqToHiveToken = ImmutableMap.copyOf(builder.optiqToHiveToken);
+ }
+
+ public static SqlOperator getOptiqOperator(GenericUDF hiveUDF) {
+ return hiveToOptiq.get(getName(hiveUDF));
+ }
+
+ public static ASTNode buildAST(SqlOperator op, List<ASTNode> children) {
+ HiveToken hToken = optiqToHiveToken.get(op);
+ ASTNode node;
+ if (hToken != null) {
+ node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text);
+ } else {
+ node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
+ node.addChild((ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, op.getName()));
+ }
+
+ for (ASTNode c : children) {
+ ParseDriver.adaptor.addChild(node, c);
+ }
+ return node;
+ }
+
+ /**
+ * Build AST for flattened Associative expressions ('and', 'or'). Flattened
+ * expressions is of the form or[x,y,z] which is originally represented as
+ * "or[x, or[y, z]]".
+ */
+ public static ASTNode buildAST(SqlOperator op, List<ASTNode> children, int i) {
+ if (i + 1 < children.size()) {
+ HiveToken hToken = optiqToHiveToken.get(op);
+ ASTNode curNode = ((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text));
+ ParseDriver.adaptor.addChild(curNode, children.get(i));
+ ParseDriver.adaptor.addChild(curNode, buildAST(op, children, i + 1));
+ return curNode;
+ } else {
+ return children.get(i);
+ }
+
+ }
+
+ private static String getName(GenericUDF hiveUDF) {
+ if (hiveUDF instanceof GenericUDFBridge) {
+ return ((GenericUDFBridge) hiveUDF).getUdfName();
+ } else {
+ return hiveUDF.getClass().getName();
+ }
+ }
+
+ private static class Builder {
+ final Map<String, SqlOperator> operatorMap = Maps.newHashMap();
+ final Map<String, SqlOperator> hiveToOptiq = Maps.newHashMap();
+ final Map<SqlOperator, HiveToken> optiqToHiveToken = Maps.newHashMap();
+
+ Builder() {
+ registerFunction("concat", SqlStdOperatorTable.CONCAT, null);
+ registerFunction("substr", SqlStdOperatorTable.SUBSTRING, null);
+ registerFunction("substring", SqlStdOperatorTable.SUBSTRING, null);
+ stringFunction("space");
+ stringFunction("repeat");
+ numericFunction("ascii");
+ stringFunction("repeat");
+
+ numericFunction("size");
+
+ numericFunction("round");
+ registerFunction("floor", SqlStdOperatorTable.FLOOR, null);
+ registerFunction("sqrt", SqlStdOperatorTable.SQRT, null);
+ registerFunction("ceil", SqlStdOperatorTable.CEIL, null);
+ registerFunction("ceiling", SqlStdOperatorTable.CEIL, null);
+ numericFunction("rand");
+ operatorMap.put("abs", SqlStdOperatorTable.ABS);
+ numericFunction("pmod");
+
+ numericFunction("ln");
+ numericFunction("log2");
+ numericFunction("sin");
+ numericFunction("asin");
+ numericFunction("cos");
+ numericFunction("acos");
+ registerFunction("log10", SqlStdOperatorTable.LOG10, null);
+ numericFunction("log");
+ numericFunction("exp");
+ numericFunction("power");
+ numericFunction("pow");
+ numericFunction("sign");
+ numericFunction("pi");
+ numericFunction("degrees");
+ numericFunction("atan");
+ numericFunction("tan");
+ numericFunction("e");
+
+ registerFunction("upper", SqlStdOperatorTable.UPPER, null);
+ registerFunction("lower", SqlStdOperatorTable.LOWER, null);
+ registerFunction("ucase", SqlStdOperatorTable.UPPER, null);
+ registerFunction("lcase", SqlStdOperatorTable.LOWER, null);
+ registerFunction("trim", SqlStdOperatorTable.TRIM, null);
+ stringFunction("ltrim");
+ stringFunction("rtrim");
+ numericFunction("length");
+
+ stringFunction("like");
+ stringFunction("rlike");
+ stringFunction("regexp");
+ stringFunction("regexp_replace");
+
+ stringFunction("regexp_extract");
+ stringFunction("parse_url");
+
+ numericFunction("day");
+ numericFunction("dayofmonth");
+ numericFunction("month");
+ numericFunction("year");
+ numericFunction("hour");
+ numericFunction("minute");
+ numericFunction("second");
+
+ registerFunction("+", SqlStdOperatorTable.PLUS, hToken(HiveParser.PLUS, "+"));
+ registerFunction("-", SqlStdOperatorTable.MINUS, hToken(HiveParser.MINUS, "-"));
+ registerFunction("*", SqlStdOperatorTable.MULTIPLY, hToken(HiveParser.STAR, "*"));
+ registerFunction("/", SqlStdOperatorTable.DIVIDE, hToken(HiveParser.STAR, "/"));
+ registerFunction("%", SqlStdOperatorTable.MOD, hToken(HiveParser.STAR, "%"));
+ numericFunction("div");
+
+ numericFunction("isnull");
+ numericFunction("isnotnull");
+
+ numericFunction("if");
+ numericFunction("in");
+ registerFunction("and", SqlStdOperatorTable.AND, hToken(HiveParser.KW_AND, "and"));
+ registerFunction("or", SqlStdOperatorTable.OR, hToken(HiveParser.KW_OR, "or"));
+ registerFunction("=", SqlStdOperatorTable.EQUALS, hToken(HiveParser.EQUAL, "="));
+// numericFunction("==");
+ numericFunction("<=>");
+ numericFunction("!=");
+
+ numericFunction("<>");
+ registerFunction("<", SqlStdOperatorTable.LESS_THAN, hToken(HiveParser.LESSTHAN, "<"));
+ registerFunction("<=", SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
+ hToken(HiveParser.LESSTHANOREQUALTO, "<="));
+ registerFunction(">", SqlStdOperatorTable.GREATER_THAN, hToken(HiveParser.GREATERTHAN, ">"));
+ registerFunction(">=", SqlStdOperatorTable.GREATER_THAN_OR_EQUAL,
+ hToken(HiveParser.GREATERTHANOREQUALTO, ">="));
+ numericFunction("not");
+ registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not"));
+ numericFunction("between");
+
+ registerFunction("case", SqlStdOperatorTable.CASE, null);
+ numericFunction("when");
+
+ // implicit convert methods
+ numericFunction(serdeConstants.BOOLEAN_TYPE_NAME);
+ numericFunction(serdeConstants.TINYINT_TYPE_NAME);
+ numericFunction(serdeConstants.SMALLINT_TYPE_NAME);
+ numericFunction(serdeConstants.INT_TYPE_NAME);
+ numericFunction(serdeConstants.BIGINT_TYPE_NAME);
+ numericFunction(serdeConstants.FLOAT_TYPE_NAME);
+ numericFunction(serdeConstants.DOUBLE_TYPE_NAME);
+ stringFunction(serdeConstants.STRING_TYPE_NAME);
+ }
+
+ private void stringFunction(String name) {
+ registerFunction(name, SqlFunctionCategory.STRING, ReturnTypes.explicit(SqlTypeName.VARCHAR));
+ }
+
+ private void numericFunction(String name) {
+ registerFunction(name, SqlFunctionCategory.NUMERIC, ReturnTypes.explicit(SqlTypeName.DECIMAL));
+ }
+
+ private void registerFunction(String name, SqlFunctionCategory cat, SqlReturnTypeInference rti) {
+ SqlOperator optiqFn = new SqlFunction(name.toUpperCase(), SqlKind.OTHER_FUNCTION, rti, null,
+ null, cat);
+ registerFunction(name, optiqFn, null);
+ }
+
+ private void registerFunction(String name, SqlOperator optiqFn, HiveToken hiveToken) {
+ FunctionInfo hFn = FunctionRegistry.getFunctionInfo(name);
+ operatorMap.put(name, optiqFn);
+
+ String hFnName = getName(hFn.getGenericUDF());
+ hiveToOptiq.put(hFnName, optiqFn);
+ if (hiveToken != null) {
+ optiqToHiveToken.put(optiqFn, hiveToken);
+ }
+ }
+ }
+
+ private static HiveToken hToken(int type, String text) {
+ return new HiveToken(type, text);
+ }
+
+ static class HiveToken {
+ int type;
+ String text;
+
+ HiveToken(int type, String text) {
+ this.type = type;
+ this.text = text;
+ }
+ }
+
+ static SqlAggFunction hiveAggFunction(String name) {
+ return new HiveAggFunction(name);
+ }
+
+ static class HiveAggFunction extends SqlAggFunction {
+
+ public HiveAggFunction(String name) {
+ super(name, SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, null,
+ OperandTypes.ANY, SqlFunctionCategory.NUMERIC);
+ }
+
+ public List<RelDataType> getParameterTypes(RelDataTypeFactory typeFactory) {
+ return ImmutableList.of(typeFactory.createSqlType(SqlTypeName.ANY));
+ }
+
+ public RelDataType getReturnType(RelDataTypeFactory typeFactory) {
+ return typeFactory.createSqlType(SqlTypeName.BIGINT);
+ }
+
+ }
+}
Added: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java?rev=1605013&view=auto
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java (added)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java Tue Jun 24 06:32:30 2014
@@ -0,0 +1,152 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.translator;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeFactory;
+import org.eigenbase.rex.RexBuilder;
+import org.eigenbase.sql.type.SqlTypeName;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+
+public class TypeConverter {
+
+ public static RelDataType getType(RelOptCluster cluster, RowResolver rr, List<String> neededCols) {
+ RexBuilder rexBuilder = cluster.getRexBuilder();
+ RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
+ RowSchema rs = rr.getRowSchema();
+ List<RelDataType> fieldTypes = new LinkedList<RelDataType>();
+ List<String> fieldNames = new LinkedList<String>();
+
+ for (ColumnInfo ci : rs.getSignature()) {
+ if (neededCols == null || neededCols.contains(ci.getInternalName())) {
+ fieldTypes.add(convert(ci.getType(), dtFactory));
+ fieldNames.add(ci.getInternalName());
+ }
+ }
+ return dtFactory.createStructType(fieldTypes, fieldNames);
+ }
+
+ public static RelDataType convert(TypeInfo type, RelDataTypeFactory dtFactory) {
+ RelDataType convertedType = null;
+
+ switch (type.getCategory()) {
+ case PRIMITIVE:
+ convertedType = convert((PrimitiveTypeInfo) type, dtFactory);
+ break;
+ case LIST:
+ convertedType = convert((ListTypeInfo) type, dtFactory);
+ break;
+ case MAP:
+ convertedType = convert((MapTypeInfo) type, dtFactory);
+ break;
+ case STRUCT:
+ convertedType = convert((StructTypeInfo) type, dtFactory);
+ break;
+ case UNION:
+ convertedType = convert((UnionTypeInfo) type, dtFactory);
+ break;
+ }
+ return convertedType;
+ }
+
+ public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) {
+ RelDataType convertedType = null;
+
+ switch (type.getPrimitiveCategory()) {
+ case VOID:
+ // @todo: followup on VOID type in hive
+ convertedType = dtFactory.createSqlType(SqlTypeName.OTHER);
+ break;
+ case BOOLEAN:
+ convertedType = dtFactory.createSqlType(SqlTypeName.BOOLEAN);
+ break;
+ case BYTE:
+ convertedType = dtFactory.createSqlType(SqlTypeName.TINYINT);
+ break;
+ case SHORT:
+ convertedType = dtFactory.createSqlType(SqlTypeName.SMALLINT);
+ break;
+ case INT:
+ convertedType = dtFactory.createSqlType(SqlTypeName.INTEGER);
+ break;
+ case LONG:
+ convertedType = dtFactory.createSqlType(SqlTypeName.BIGINT);
+ break;
+ case FLOAT:
+ convertedType = dtFactory.createSqlType(SqlTypeName.FLOAT);
+ break;
+ case DOUBLE:
+ convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE);
+ break;
+ case STRING:
+ convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, 1);
+ break;
+ case DATE:
+ convertedType = dtFactory.createSqlType(SqlTypeName.DATE);
+ break;
+ case TIMESTAMP:
+ convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP);
+ break;
+ case BINARY:
+ convertedType = dtFactory.createSqlType(SqlTypeName.BINARY);
+ break;
+ case DECIMAL:
+ convertedType = dtFactory.createSqlType(SqlTypeName.DECIMAL);
+ break;
+ case VARCHAR:
+ convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR,
+ ((BaseCharTypeInfo) type).getLength());
+ break;
+ case CHAR:
+ convertedType = dtFactory.createSqlType(SqlTypeName.CHAR,
+ ((BaseCharTypeInfo) type).getLength());
+ break;
+ case UNKNOWN:
+ convertedType = dtFactory.createSqlType(SqlTypeName.OTHER);
+ break;
+ }
+
+ return convertedType;
+ }
+
+ public static RelDataType convert(ListTypeInfo lstType, RelDataTypeFactory dtFactory) {
+ RelDataType elemType = convert(lstType.getListElementTypeInfo(), dtFactory);
+ return dtFactory.createArrayType(elemType, -1);
+ }
+
+ public static RelDataType convert(MapTypeInfo mapType, RelDataTypeFactory dtFactory) {
+ RelDataType keyType = convert(mapType.getMapKeyTypeInfo(), dtFactory);
+ RelDataType valueType = convert(mapType.getMapValueTypeInfo(), dtFactory);
+ return dtFactory.createMapType(keyType, valueType);
+ }
+
+ public static RelDataType convert(StructTypeInfo structType, final RelDataTypeFactory dtFactory) {
+ List<RelDataType> fTypes = Lists.transform(structType.getAllStructFieldTypeInfos(),
+ new Function<TypeInfo, RelDataType>() {
+ public RelDataType apply(TypeInfo tI) {
+ return convert(tI, dtFactory);
+ }
+ });
+ return dtFactory.createStructType(fTypes, structType.getAllStructFieldNames());
+ }
+
+ public static RelDataType convert(UnionTypeInfo unionType, RelDataTypeFactory dtFactory) {
+ // @todo what do we about unions?
+ throw new UnsupportedOperationException();
+ }
+
+}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1605013&r1=1605012&r2=1605013&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java Tue Jun 24 06:32:30 2014
@@ -101,7 +101,7 @@ public class ColumnStatsSemanticAnalyzer
}
public ColumnStatsSemanticAnalyzer(HiveConf conf) throws SemanticException {
- super(conf);
+ super(conf, false);
}
private boolean shouldRewrite(ASTNode tree) {
@@ -459,7 +459,7 @@ public class ColumnStatsSemanticAnalyzer
}
public ColumnStatsSemanticAnalyzer(HiveConf conf, ASTNode tree) throws SemanticException {
- super(conf);
+ super(conf, false);
// check if it is no scan. grammar prevents coexit noscan/columns
super.processNoScanCommand(tree);
// check if it is partial scan. grammar prevents coexit partialscan/columns
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java?rev=1605013&r1=1605012&r2=1605013&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java Tue Jun 24 06:32:30 2014
@@ -131,7 +131,7 @@ public class ParseDriver {
* so that the graph walking algorithms and the rules framework defined in
* ql.lib can be used with the AST Nodes.
*/
- static final TreeAdaptor adaptor = new CommonTreeAdaptor() {
+ public static final TreeAdaptor adaptor = new CommonTreeAdaptor() {
/**
* Creates an ASTNode for the given token. The ASTNode is a wrapper around
* antlr's CommonTree class that implements the Node interface.
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1605013&r1=1605012&r2=1605013&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Jun 24 06:32:30 2014
@@ -100,7 +100,10 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.optimizer.CostBasedOptimizer;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
+import org.apache.hadoop.hive.ql.optimizer.PreCBOOptimizer;
+import org.apache.hadoop.hive.ql.optimizer.optiq.stats.CBOTableStatsValidator;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
@@ -259,6 +262,9 @@ public class SemanticAnalyzer extends Ba
//flag for partial scan during analyze ... compute statistics
protected boolean partialscan = false;
+ private volatile boolean runCBO = true;
+ private volatile boolean disableJoinMerge = false;
+
/*
* Capture the CTE definitions in a Query.
*/
@@ -273,6 +279,11 @@ public class SemanticAnalyzer extends Ba
int nextNum;
}
+ protected SemanticAnalyzer(HiveConf conf, boolean runCBO) throws SemanticException {
+ this(conf);
+ this.runCBO = runCBO;
+ }
+
public SemanticAnalyzer(HiveConf conf) throws SemanticException {
super(conf);
@@ -323,7 +334,28 @@ public class SemanticAnalyzer extends Ba
opParseCtx.clear();
groupOpToInputTables.clear();
prunedPartitions.clear();
+ disableJoinMerge = false;
aliasToCTEs.clear();
+ topToTable.clear();
+ opToPartPruner.clear();
+ opToPartList.clear();
+ opToPartToSkewedPruner.clear();
+ opToSamplePruner.clear();
+ nameToSplitSample.clear();
+ fsopToTable.clear();
+ resultSchema = null;
+ createVwDesc = null;
+ viewsExpanded = null;
+ viewSelect = null;
+ ctesExpanded = null;
+ globalLimitCtx.disableOpt();
+ viewAliasToInput.clear();
+ reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
+ topToTableProps.clear();
+ listMapJoinOpsNoReducer.clear();
+ unparseTranslator.clear();
+ queryProperties.clear();
+ outputs.clear();
}
public void initParseCtx(ParseContext pctx) {
@@ -972,7 +1004,6 @@ public class SemanticAnalyzer extends Ba
frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
processLateralView(qb, frm);
} else if (isJoinToken(frm)) {
- queryProperties.setHasJoin(true);
processJoin(qb, frm);
qbp.setJoinExpr(frm);
}else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
@@ -1189,6 +1220,10 @@ public class SemanticAnalyzer extends Ba
}
}
+ public Table getTable(TableScanOperator ts) {
+ return topToTable.get(ts);
+ }
+
public void getMetaData(QB qb) throws SemanticException {
getMetaData(qb, null);
}
@@ -6737,6 +6772,7 @@ public class SemanticAnalyzer extends Ba
}
desc.setNullSafes(nullsafes);
}
+ queryProperties.incrementJoinCount(joinOp.getConf().getNoOuterJoin());
return putOpInsertMap(joinOp, outputRR);
}
@@ -9146,7 +9182,9 @@ public class SemanticAnalyzer extends Ba
aliasToOpInfo );
}
}
- mergeJoinTree(qb);
+
+ if (!disableJoinMerge)
+ mergeJoinTree(qb);
}
// if any filters are present in the join tree, push them on top of the
@@ -9411,6 +9449,19 @@ public class SemanticAnalyzer extends Ba
getMetaData(qb);
LOG.info("Completed getting MetaData in Semantic Analysis");
+ if (runCBO) {
+ boolean tokenTypeIsQuery = ast.getToken().getType() == HiveParser.TOK_QUERY
+ || ast.getToken().getType() == HiveParser.TOK_EXPLAIN;
+ if (!tokenTypeIsQuery || createVwDesc != null
+ || !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED)) {
+ runCBO = false;
+ }
+
+ if (runCBO) {
+ disableJoinMerge = true;
+ }
+ }
+
// Save the result schema derived from the sink operator produced
// by genPlan. This has the correct column names, which clients
// such as JDBC would prefer instead of the c0, c1 we'll end
@@ -9423,6 +9474,96 @@ public class SemanticAnalyzer extends Ba
resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(),
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
+ if (runCBO) {
+ /*
+ * For CBO: 1. Check if CBO can handle op tree. 2. Run PreCBOOptimizer on
+ * Plan. This applies: Partition Pruning, Predicate Pushdown, Column
+ * Pruning and Stats Annotation transformations on the generated plan. 3.
+ * Validate that all TS has valid stats 4. Hand the Plan to CBO, which
+ * searches the Plan space and returns the best Plan as an AST 5. We then
+ * run the Analysis Pipeline on the new AST: Phase 1, Get Metadata, Gen
+ * Plan. a. During Plan Generation, we disable Join Merging, because we
+ * don't want the Join order to be changed. Error Handling: On Failure -
+ * we restart the Analysis from the beginning on the original AST, with
+ * runCBO set to false.
+ */
+ boolean reAnalyzeAST = false;
+
+ try {
+ // 1. Can CBO handle OP tree
+ if (CostBasedOptimizer.canHandleOpTree(sinkOp, conf, queryProperties)) {
+ ASTNode newAST = null;
+
+ // 2. Set up parse ctx for CBO
+ ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner, opToPartList,
+ topOps, topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable,
+ topToTableProps, fsopToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap,
+ destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
+ opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
+ opToPartToSkewedPruner, viewAliasToInput,
+ reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties);
+
+ // 3. Run Pre CBO optimizer
+ PreCBOOptimizer preCBOOptm = new PreCBOOptimizer();
+ preCBOOptm.setPctx(pCtx);
+ preCBOOptm.initialize(conf);
+ pCtx = preCBOOptm.optimize();
+
+ // 4. Validate Table Stats
+ CBOTableStatsValidator tableStatsValidator = new CBOTableStatsValidator();
+ if (tableStatsValidator.validStats(sinkOp, pCtx)) {
+
+ // 5. Optimize the plan with CBO & generate optimized AST
+ newAST = CostBasedOptimizer.optimize(sinkOp, this, pCtx, resultSchema);
+ if (LOG.isDebugEnabled()) {
+ String newAstExpanded = newAST.dump();
+ LOG.debug("CBO rewritten query: \n" + newAstExpanded);
+ }
+
+ // 6. Regen OP plan from optimized AST
+ init();
+ ctx_1 = initPhase1Ctx();
+ if (!doPhase1(newAST, qb, ctx_1)) {
+ throw new RuntimeException("Couldn't do phase1 on CBO optimized query plan");
+ }
+ getMetaData(qb);
+
+ disableJoinMerge = true;
+ sinkOp = genPlan(qb);
+
+ /*
+ * Use non CBO Result Set Schema so as to preserve user specified
+ * names. Hive seems to have bugs with OB/LIMIT in sub queries.
+ * // 7. Reset result set schema resultSchema =
+ * convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp)
+ * .getRowResolver(), true);
+ */
+ } else {
+ reAnalyzeAST = true;
+ LOG.warn("Skipping CBO. Incomplete column stats for Tables: "
+ + tableStatsValidator.getIncompleteStatsTabNames());
+ }
+ } else {
+ // Need to regen OP tree since join merge was disabled.
+ // TODO: can we just regen OP tree instead of reanalyzing AST.
+ if (queryProperties.getJoinCount() > 1)
+ reAnalyzeAST = true;
+ LOG.info("Skipping CBO as CBO can not handle OP tree.");
+ }
+ } catch (Exception e) {
+ reAnalyzeAST = true;
+ LOG.warn("CBO failed, skipping CBO. ", e);
+ } finally {
+ runCBO = false;
+ disableJoinMerge = false;
+ if (reAnalyzeAST) {
+ init();
+ analyzeInternal(ast);
+ return;
+ }
+ }
+ }
+
ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner,
opToPartList, topOps, topSelOps, opParseCtx, joinContext, smbMapJoinContext,
topToTable, topToTableProps, fsopToTable,
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java?rev=1605013&r1=1605012&r2=1605013&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java Tue Jun 24 06:32:30 2014
@@ -262,4 +262,10 @@ class UnparseTranslator {
ASTNode targetNode;
ASTNode sourceNode;
}
+
+ public void clear() {
+ translations.clear();
+ copyTranslations.clear();
+ enabled = false;
+ }
}