You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/06/24 08:32:31 UTC

svn commit: r1605013 [3/3] - in /hive/branches/cbo: ./ common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/ ql/src/java/org/apache/hadoop/hive/ql/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/...

Added: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java?rev=1605013&view=auto
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java (added)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java Tue Jun 24 06:32:30 2014
@@ -0,0 +1,261 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.translator;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeFactory;
+import org.eigenbase.sql.SqlAggFunction;
+import org.eigenbase.sql.SqlFunction;
+import org.eigenbase.sql.SqlFunctionCategory;
+import org.eigenbase.sql.SqlKind;
+import org.eigenbase.sql.SqlOperator;
+import org.eigenbase.sql.fun.SqlStdOperatorTable;
+import org.eigenbase.sql.type.OperandTypes;
+import org.eigenbase.sql.type.ReturnTypes;
+import org.eigenbase.sql.type.SqlReturnTypeInference;
+import org.eigenbase.sql.type.SqlTypeName;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
+public class SqlFunctionConverter {
+  static final Map<String, SqlOperator>    operatorMap;
+  static final Map<String, SqlOperator>    hiveToOptiq;
+  static final Map<SqlOperator, HiveToken> optiqToHiveToken;
+
+  static {
+    Builder builder = new Builder();
+    operatorMap = ImmutableMap.copyOf(builder.operatorMap);
+    hiveToOptiq = ImmutableMap.copyOf(builder.hiveToOptiq);
+    optiqToHiveToken = ImmutableMap.copyOf(builder.optiqToHiveToken);
+  }
+
+  public static SqlOperator getOptiqOperator(GenericUDF hiveUDF) {
+    return hiveToOptiq.get(getName(hiveUDF));
+  }
+
+  public static ASTNode buildAST(SqlOperator op, List<ASTNode> children) {
+    HiveToken hToken = optiqToHiveToken.get(op);
+    ASTNode node;
+    if (hToken != null) {
+      node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text);
+    } else {
+      node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
+      node.addChild((ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, op.getName()));
+    }
+
+    for (ASTNode c : children) {
+      ParseDriver.adaptor.addChild(node, c);
+    }
+    return node;
+  }
+
+  /**
+   * Build AST for flattened Associative expressions ('and', 'or'). Flattened
+   * expressions is of the form or[x,y,z] which is originally represented as
+   * "or[x, or[y, z]]".
+   */
+  public static ASTNode buildAST(SqlOperator op, List<ASTNode> children, int i) {
+    if (i + 1 < children.size()) {
+      HiveToken hToken = optiqToHiveToken.get(op);
+      ASTNode curNode = ((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text));
+      ParseDriver.adaptor.addChild(curNode, children.get(i));
+      ParseDriver.adaptor.addChild(curNode, buildAST(op, children, i + 1));
+      return curNode;
+    } else {
+      return children.get(i);
+    }
+
+  }
+
+  private static String getName(GenericUDF hiveUDF) {
+    if (hiveUDF instanceof GenericUDFBridge) {
+      return ((GenericUDFBridge) hiveUDF).getUdfName();
+    } else {
+      return hiveUDF.getClass().getName();
+    }
+  }
+
+  private static class Builder {
+    final Map<String, SqlOperator>    operatorMap      = Maps.newHashMap();
+    final Map<String, SqlOperator>    hiveToOptiq      = Maps.newHashMap();
+    final Map<SqlOperator, HiveToken> optiqToHiveToken = Maps.newHashMap();
+
+    Builder() {
+      registerFunction("concat", SqlStdOperatorTable.CONCAT, null);
+      registerFunction("substr", SqlStdOperatorTable.SUBSTRING, null);
+      registerFunction("substring", SqlStdOperatorTable.SUBSTRING, null);
+      stringFunction("space");
+      stringFunction("repeat");
+      numericFunction("ascii");
+      stringFunction("repeat");
+
+      numericFunction("size");
+
+      numericFunction("round");
+      registerFunction("floor", SqlStdOperatorTable.FLOOR, null);
+      registerFunction("sqrt", SqlStdOperatorTable.SQRT, null);
+      registerFunction("ceil", SqlStdOperatorTable.CEIL, null);
+      registerFunction("ceiling", SqlStdOperatorTable.CEIL, null);
+      numericFunction("rand");
+      operatorMap.put("abs", SqlStdOperatorTable.ABS);
+      numericFunction("pmod");
+
+      numericFunction("ln");
+      numericFunction("log2");
+      numericFunction("sin");
+      numericFunction("asin");
+      numericFunction("cos");
+      numericFunction("acos");
+      registerFunction("log10", SqlStdOperatorTable.LOG10, null);
+      numericFunction("log");
+      numericFunction("exp");
+      numericFunction("power");
+      numericFunction("pow");
+      numericFunction("sign");
+      numericFunction("pi");
+      numericFunction("degrees");
+      numericFunction("atan");
+      numericFunction("tan");
+      numericFunction("e");
+
+      registerFunction("upper", SqlStdOperatorTable.UPPER, null);
+      registerFunction("lower", SqlStdOperatorTable.LOWER, null);
+      registerFunction("ucase", SqlStdOperatorTable.UPPER, null);
+      registerFunction("lcase", SqlStdOperatorTable.LOWER, null);
+      registerFunction("trim", SqlStdOperatorTable.TRIM, null);
+      stringFunction("ltrim");
+      stringFunction("rtrim");
+      numericFunction("length");
+
+      stringFunction("like");
+      stringFunction("rlike");
+      stringFunction("regexp");
+      stringFunction("regexp_replace");
+
+      stringFunction("regexp_extract");
+      stringFunction("parse_url");
+
+      numericFunction("day");
+      numericFunction("dayofmonth");
+      numericFunction("month");
+      numericFunction("year");
+      numericFunction("hour");
+      numericFunction("minute");
+      numericFunction("second");
+
+      registerFunction("+", SqlStdOperatorTable.PLUS, hToken(HiveParser.PLUS, "+"));
+      registerFunction("-", SqlStdOperatorTable.MINUS, hToken(HiveParser.MINUS, "-"));
+      registerFunction("*", SqlStdOperatorTable.MULTIPLY, hToken(HiveParser.STAR, "*"));
+      registerFunction("/", SqlStdOperatorTable.DIVIDE, hToken(HiveParser.STAR, "/"));
+      registerFunction("%", SqlStdOperatorTable.MOD, hToken(HiveParser.STAR, "%"));
+      numericFunction("div");
+
+      numericFunction("isnull");
+      numericFunction("isnotnull");
+
+      numericFunction("if");
+      numericFunction("in");
+      registerFunction("and", SqlStdOperatorTable.AND, hToken(HiveParser.KW_AND, "and"));
+      registerFunction("or", SqlStdOperatorTable.OR, hToken(HiveParser.KW_OR, "or"));
+      registerFunction("=", SqlStdOperatorTable.EQUALS, hToken(HiveParser.EQUAL, "="));
+//      numericFunction("==");
+      numericFunction("<=>");
+      numericFunction("!=");
+
+      numericFunction("<>");
+      registerFunction("<", SqlStdOperatorTable.LESS_THAN, hToken(HiveParser.LESSTHAN, "<"));
+      registerFunction("<=", SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
+          hToken(HiveParser.LESSTHANOREQUALTO, "<="));
+      registerFunction(">", SqlStdOperatorTable.GREATER_THAN, hToken(HiveParser.GREATERTHAN, ">"));
+      registerFunction(">=", SqlStdOperatorTable.GREATER_THAN_OR_EQUAL,
+          hToken(HiveParser.GREATERTHANOREQUALTO, ">="));
+      numericFunction("not");
+      registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not"));
+      numericFunction("between");
+
+      registerFunction("case", SqlStdOperatorTable.CASE, null);
+      numericFunction("when");
+
+      // implicit convert methods
+      numericFunction(serdeConstants.BOOLEAN_TYPE_NAME);
+      numericFunction(serdeConstants.TINYINT_TYPE_NAME);
+      numericFunction(serdeConstants.SMALLINT_TYPE_NAME);
+      numericFunction(serdeConstants.INT_TYPE_NAME);
+      numericFunction(serdeConstants.BIGINT_TYPE_NAME);
+      numericFunction(serdeConstants.FLOAT_TYPE_NAME);
+      numericFunction(serdeConstants.DOUBLE_TYPE_NAME);
+      stringFunction(serdeConstants.STRING_TYPE_NAME);
+    }
+
+    private void stringFunction(String name) {
+      registerFunction(name, SqlFunctionCategory.STRING, ReturnTypes.explicit(SqlTypeName.VARCHAR));
+    }
+
+    private void numericFunction(String name) {
+      registerFunction(name, SqlFunctionCategory.NUMERIC, ReturnTypes.explicit(SqlTypeName.DECIMAL));
+    }
+
+    private void registerFunction(String name, SqlFunctionCategory cat, SqlReturnTypeInference rti) {
+      SqlOperator optiqFn = new SqlFunction(name.toUpperCase(), SqlKind.OTHER_FUNCTION, rti, null,
+          null, cat);
+      registerFunction(name, optiqFn, null);
+    }
+
+    private void registerFunction(String name, SqlOperator optiqFn, HiveToken hiveToken) {
+      FunctionInfo hFn = FunctionRegistry.getFunctionInfo(name);
+      operatorMap.put(name, optiqFn);
+
+      String hFnName = getName(hFn.getGenericUDF());
+      hiveToOptiq.put(hFnName, optiqFn);
+      if (hiveToken != null) {
+        optiqToHiveToken.put(optiqFn, hiveToken);
+      }
+    }
+  }
+
+  private static HiveToken hToken(int type, String text) {
+    return new HiveToken(type, text);
+  }
+
+  static class HiveToken {
+    int    type;
+    String text;
+
+    HiveToken(int type, String text) {
+      this.type = type;
+      this.text = text;
+    }
+  }
+
+  static SqlAggFunction hiveAggFunction(String name) {
+    return new HiveAggFunction(name);
+  }
+
+  static class HiveAggFunction extends SqlAggFunction {
+
+    public HiveAggFunction(String name) {
+      super(name, SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, null,
+          OperandTypes.ANY, SqlFunctionCategory.NUMERIC);
+    }
+
+    public List<RelDataType> getParameterTypes(RelDataTypeFactory typeFactory) {
+      return ImmutableList.of(typeFactory.createSqlType(SqlTypeName.ANY));
+    }
+
+    public RelDataType getReturnType(RelDataTypeFactory typeFactory) {
+      return typeFactory.createSqlType(SqlTypeName.BIGINT);
+    }
+
+  }
+}

Added: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java?rev=1605013&view=auto
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java (added)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/TypeConverter.java Tue Jun 24 06:32:30 2014
@@ -0,0 +1,152 @@
+package org.apache.hadoop.hive.ql.optimizer.optiq.translator;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeFactory;
+import org.eigenbase.rex.RexBuilder;
+import org.eigenbase.sql.type.SqlTypeName;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+
+public class TypeConverter {
+
+  public static RelDataType getType(RelOptCluster cluster, RowResolver rr, List<String> neededCols) {
+    RexBuilder rexBuilder = cluster.getRexBuilder();
+    RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
+    RowSchema rs = rr.getRowSchema();
+    List<RelDataType> fieldTypes = new LinkedList<RelDataType>();
+    List<String> fieldNames = new LinkedList<String>();
+
+    for (ColumnInfo ci : rs.getSignature()) {
+      if (neededCols == null || neededCols.contains(ci.getInternalName())) {
+        fieldTypes.add(convert(ci.getType(), dtFactory));
+        fieldNames.add(ci.getInternalName());
+      }
+    }
+    return dtFactory.createStructType(fieldTypes, fieldNames);
+  }
+
+  public static RelDataType convert(TypeInfo type, RelDataTypeFactory dtFactory) {
+    RelDataType convertedType = null;
+
+    switch (type.getCategory()) {
+    case PRIMITIVE:
+      convertedType = convert((PrimitiveTypeInfo) type, dtFactory);
+      break;
+    case LIST:
+      convertedType = convert((ListTypeInfo) type, dtFactory);
+      break;
+    case MAP:
+      convertedType = convert((MapTypeInfo) type, dtFactory);
+      break;
+    case STRUCT:
+      convertedType = convert((StructTypeInfo) type, dtFactory);
+      break;
+    case UNION:
+      convertedType = convert((UnionTypeInfo) type, dtFactory);
+      break;
+    }
+    return convertedType;
+  }
+
+  public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) {
+    RelDataType convertedType = null;
+
+    switch (type.getPrimitiveCategory()) {
+    case VOID:
+      // @todo: followup on VOID type in hive
+      convertedType = dtFactory.createSqlType(SqlTypeName.OTHER);
+      break;
+    case BOOLEAN:
+      convertedType = dtFactory.createSqlType(SqlTypeName.BOOLEAN);
+      break;
+    case BYTE:
+      convertedType = dtFactory.createSqlType(SqlTypeName.TINYINT);
+      break;
+    case SHORT:
+      convertedType = dtFactory.createSqlType(SqlTypeName.SMALLINT);
+      break;
+    case INT:
+      convertedType = dtFactory.createSqlType(SqlTypeName.INTEGER);
+      break;
+    case LONG:
+      convertedType = dtFactory.createSqlType(SqlTypeName.BIGINT);
+      break;
+    case FLOAT:
+      convertedType = dtFactory.createSqlType(SqlTypeName.FLOAT);
+      break;
+    case DOUBLE:
+      convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE);
+      break;
+    case STRING:
+      convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR, 1);
+      break;
+    case DATE:
+      convertedType = dtFactory.createSqlType(SqlTypeName.DATE);
+      break;
+    case TIMESTAMP:
+      convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP);
+      break;
+    case BINARY:
+      convertedType = dtFactory.createSqlType(SqlTypeName.BINARY);
+      break;
+    case DECIMAL:
+      convertedType = dtFactory.createSqlType(SqlTypeName.DECIMAL);
+      break;
+    case VARCHAR:
+      convertedType = dtFactory.createSqlType(SqlTypeName.VARCHAR,
+          ((BaseCharTypeInfo) type).getLength());
+      break;
+    case CHAR:
+      convertedType = dtFactory.createSqlType(SqlTypeName.CHAR,
+          ((BaseCharTypeInfo) type).getLength());
+      break;
+    case UNKNOWN:
+      convertedType = dtFactory.createSqlType(SqlTypeName.OTHER);
+      break;
+    }
+
+    return convertedType;
+  }
+
+  public static RelDataType convert(ListTypeInfo lstType, RelDataTypeFactory dtFactory) {
+    RelDataType elemType = convert(lstType.getListElementTypeInfo(), dtFactory);
+    return dtFactory.createArrayType(elemType, -1);
+  }
+
+  public static RelDataType convert(MapTypeInfo mapType, RelDataTypeFactory dtFactory) {
+    RelDataType keyType = convert(mapType.getMapKeyTypeInfo(), dtFactory);
+    RelDataType valueType = convert(mapType.getMapValueTypeInfo(), dtFactory);
+    return dtFactory.createMapType(keyType, valueType);
+  }
+
+  public static RelDataType convert(StructTypeInfo structType, final RelDataTypeFactory dtFactory) {
+    List<RelDataType> fTypes = Lists.transform(structType.getAllStructFieldTypeInfos(),
+        new Function<TypeInfo, RelDataType>() {
+          public RelDataType apply(TypeInfo tI) {
+            return convert(tI, dtFactory);
+          }
+        });
+    return dtFactory.createStructType(fTypes, structType.getAllStructFieldNames());
+  }
+
+  public static RelDataType convert(UnionTypeInfo unionType, RelDataTypeFactory dtFactory) {
+    // @todo what do we about unions?
+    throw new UnsupportedOperationException();
+  }
+
+}

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1605013&r1=1605012&r2=1605013&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java Tue Jun 24 06:32:30 2014
@@ -101,7 +101,7 @@ public class ColumnStatsSemanticAnalyzer
   }
 
   public ColumnStatsSemanticAnalyzer(HiveConf conf) throws SemanticException {
-    super(conf);
+    super(conf, false);
   }
 
   private boolean shouldRewrite(ASTNode tree) {
@@ -459,7 +459,7 @@ public class ColumnStatsSemanticAnalyzer
   }
 
   public ColumnStatsSemanticAnalyzer(HiveConf conf, ASTNode tree) throws SemanticException {
-    super(conf);
+    super(conf, false);
     // check if it is no scan. grammar prevents coexit noscan/columns
     super.processNoScanCommand(tree);
     // check if it is partial scan. grammar prevents coexit partialscan/columns

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java?rev=1605013&r1=1605012&r2=1605013&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java Tue Jun 24 06:32:30 2014
@@ -131,7 +131,7 @@ public class ParseDriver {
    * so that the graph walking algorithms and the rules framework defined in
    * ql.lib can be used with the AST Nodes.
    */
-  static final TreeAdaptor adaptor = new CommonTreeAdaptor() {
+  public static final TreeAdaptor adaptor = new CommonTreeAdaptor() {
     /**
      * Creates an ASTNode for the given token. The ASTNode is a wrapper around
      * antlr's CommonTree class that implements the Node interface.

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1605013&r1=1605012&r2=1605013&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Jun 24 06:32:30 2014
@@ -100,7 +100,10 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.optimizer.CostBasedOptimizer;
 import org.apache.hadoop.hive.ql.optimizer.Optimizer;
+import org.apache.hadoop.hive.ql.optimizer.PreCBOOptimizer;
+import org.apache.hadoop.hive.ql.optimizer.optiq.stats.CBOTableStatsValidator;
 import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
 import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType;
 import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
@@ -259,6 +262,9 @@ public class SemanticAnalyzer extends Ba
   //flag for partial scan during analyze ... compute statistics
   protected boolean partialscan = false;
 
+  private volatile boolean runCBO = true;
+  private volatile boolean disableJoinMerge = false;
+
   /*
    * Capture the CTE definitions in a Query.
    */
@@ -273,6 +279,11 @@ public class SemanticAnalyzer extends Ba
     int nextNum;
   }
 
+  protected SemanticAnalyzer(HiveConf conf, boolean runCBO) throws SemanticException {
+    this(conf);
+    this.runCBO = runCBO;
+  }
+
   public SemanticAnalyzer(HiveConf conf) throws SemanticException {
 
     super(conf);
@@ -323,7 +334,28 @@ public class SemanticAnalyzer extends Ba
     opParseCtx.clear();
     groupOpToInputTables.clear();
     prunedPartitions.clear();
+    disableJoinMerge = false;
     aliasToCTEs.clear();
+    topToTable.clear();
+    opToPartPruner.clear();
+    opToPartList.clear();
+    opToPartToSkewedPruner.clear();
+    opToSamplePruner.clear();
+    nameToSplitSample.clear();
+    fsopToTable.clear();
+    resultSchema = null;
+    createVwDesc = null;
+    viewsExpanded = null;
+    viewSelect = null;
+    ctesExpanded = null;
+    globalLimitCtx.disableOpt();
+    viewAliasToInput.clear();
+    reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
+    topToTableProps.clear();
+    listMapJoinOpsNoReducer.clear();
+    unparseTranslator.clear();
+    queryProperties.clear();
+    outputs.clear();
   }
 
   public void initParseCtx(ParseContext pctx) {
@@ -972,7 +1004,6 @@ public class SemanticAnalyzer extends Ba
             frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
           processLateralView(qb, frm);
         } else if (isJoinToken(frm)) {
-          queryProperties.setHasJoin(true);
           processJoin(qb, frm);
           qbp.setJoinExpr(frm);
         }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
@@ -1189,6 +1220,10 @@ public class SemanticAnalyzer extends Ba
     }
   }
 
+  public Table getTable(TableScanOperator ts) {
+    return topToTable.get(ts);
+  }
+
   public void getMetaData(QB qb) throws SemanticException {
     getMetaData(qb, null);
   }
@@ -6737,6 +6772,7 @@ public class SemanticAnalyzer extends Ba
       }
       desc.setNullSafes(nullsafes);
     }
+    queryProperties.incrementJoinCount(joinOp.getConf().getNoOuterJoin());
     return putOpInsertMap(joinOp, outputRR);
   }
 
@@ -9146,7 +9182,9 @@ public class SemanticAnalyzer extends Ba
                 aliasToOpInfo );
           }
         }
-        mergeJoinTree(qb);
+
+        if (!disableJoinMerge)
+          mergeJoinTree(qb);
       }
 
       // if any filters are present in the join tree, push them on top of the
@@ -9411,6 +9449,19 @@ public class SemanticAnalyzer extends Ba
     getMetaData(qb);
     LOG.info("Completed getting MetaData in Semantic Analysis");
 
+    if (runCBO) {
+      boolean tokenTypeIsQuery = ast.getToken().getType() == HiveParser.TOK_QUERY
+          || ast.getToken().getType() == HiveParser.TOK_EXPLAIN;
+      if (!tokenTypeIsQuery || createVwDesc != null
+          || !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED)) {
+        runCBO = false;
+      }
+
+      if (runCBO) {
+        disableJoinMerge = true;
+      }
+    }
+
     // Save the result schema derived from the sink operator produced
     // by genPlan. This has the correct column names, which clients
     // such as JDBC would prefer instead of the c0, c1 we'll end
@@ -9423,6 +9474,96 @@ public class SemanticAnalyzer extends Ba
       resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(),
           HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
 
+    if (runCBO) {
+      /*
+       * For CBO: 1. Check if CBO can handle op tree. 2. Run PreCBOOptimizer on
+       * Plan. This applies: Partition Pruning, Predicate Pushdown, Column
+       * Pruning and Stats Annotation transformations on the generated plan. 3.
+       * Validate that all TS has valid stats 4. Hand the Plan to CBO, which
+       * searches the Plan space and returns the best Plan as an AST 5. We then
+       * run the Analysis Pipeline on the new AST: Phase 1, Get Metadata, Gen
+       * Plan. a. During Plan Generation, we disable Join Merging, because we
+       * don't want the Join order to be changed. Error Handling: On Failure -
+       * we restart the Analysis from the beginning on the original AST, with
+       * runCBO set to false.
+       */
+      boolean reAnalyzeAST = false;
+
+      try {
+        // 1. Can CBO handle OP tree
+        if (CostBasedOptimizer.canHandleOpTree(sinkOp, conf, queryProperties)) {
+          ASTNode newAST = null;
+
+          // 2. Set up parse ctx for CBO
+          ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner, opToPartList,
+              topOps, topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable,
+              topToTableProps, fsopToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap,
+              destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
+              opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
+              opToPartToSkewedPruner, viewAliasToInput,
+              reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties);
+
+          // 3. Run Pre CBO optimizer
+          PreCBOOptimizer preCBOOptm = new PreCBOOptimizer();
+          preCBOOptm.setPctx(pCtx);
+          preCBOOptm.initialize(conf);
+          pCtx = preCBOOptm.optimize();
+
+          // 4. Validate Table Stats
+          CBOTableStatsValidator tableStatsValidator = new CBOTableStatsValidator();
+          if (tableStatsValidator.validStats(sinkOp, pCtx)) {
+
+            // 5. Optimize the plan with CBO & generate optimized AST
+            newAST = CostBasedOptimizer.optimize(sinkOp, this, pCtx, resultSchema);
+            if (LOG.isDebugEnabled()) {
+              String newAstExpanded = newAST.dump();
+              LOG.debug("CBO rewritten query: \n" + newAstExpanded);
+            }
+
+            // 6. Regen OP plan from optimized AST
+            init();
+            ctx_1 = initPhase1Ctx();
+            if (!doPhase1(newAST, qb, ctx_1)) {
+              throw new RuntimeException("Couldn't do phase1 on CBO optimized query plan");
+            }
+            getMetaData(qb);
+
+            disableJoinMerge = true;
+            sinkOp = genPlan(qb);
+
+            /*
+             * Use non CBO Result Set Schema so as to preserve user specified
+             * names. Hive seems to have bugs with OB/LIMIT in sub queries.
+             * // 7. Reset result set schema resultSchema =
+             * convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp)
+             * .getRowResolver(), true);
+             */
+          } else {
+            reAnalyzeAST = true;
+            LOG.warn("Skipping CBO. Incomplete column stats for Tables: "
+                + tableStatsValidator.getIncompleteStatsTabNames());
+          }
+        } else {
+          // Need to regen OP tree since join merge was disabled.
+          // TODO: can we just regen OP tree instead of reanalyzing AST.
+          if (queryProperties.getJoinCount() > 1)
+            reAnalyzeAST = true;
+          LOG.info("Skipping CBO as CBO can not handle OP tree.");
+        }
+      } catch (Exception e) {
+        reAnalyzeAST = true;
+        LOG.warn("CBO failed, skipping CBO. ", e);
+      } finally {
+        runCBO = false;
+        disableJoinMerge = false;
+        if (reAnalyzeAST) {
+          init();
+          analyzeInternal(ast);
+          return;
+        }
+      }
+    }
+
     ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner,
         opToPartList, topOps, topSelOps, opParseCtx, joinContext, smbMapJoinContext,
         topToTable, topToTableProps, fsopToTable,

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java?rev=1605013&r1=1605012&r2=1605013&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java Tue Jun 24 06:32:30 2014
@@ -262,4 +262,10 @@ class UnparseTranslator {
     ASTNode targetNode;
     ASTNode sourceNode;
   }
+
+  public void clear() {
+    translations.clear();
+    copyTranslations.clear();
+    enabled = false;
+  }
 }