You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by kw...@apache.org on 2016/09/30 02:14:53 UTC
[36/61] [partial] incubator-impala git commit: IMPALA-3786: Replace
"cloudera" with "apache" (part 1)
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java b/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java
deleted file mode 100644
index 8304c66..0000000
--- a/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java
+++ /dev/null
@@ -1,905 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.analysis;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.cloudera.impala.analysis.AnalysisContext.AnalysisResult;
-import com.cloudera.impala.analysis.UnionStmt.UnionOperand;
-import com.cloudera.impala.common.AnalysisException;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Predicates;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-
-/**
- * Class representing a statement rewriter. A statement rewriter performs subquery
- * unnesting on an analyzed parse tree.
- * TODO: Now that we have a nested-loop join supporting all join modes we could
- * allow more rewrites, although it is not clear we would always want to.
- */
-public class StmtRewriter {
- private final static Logger LOG = LoggerFactory.getLogger(StmtRewriter.class);
-
- /**
- * Rewrite the statement of an analysis result. The unanalyzed rewritten
- * statement is returned.
- */
- public static StatementBase rewrite(AnalysisResult analysisResult)
- throws AnalysisException {
- // Analyzed stmt that contains a query statement with subqueries to be rewritten.
- StatementBase stmt = analysisResult.getStmt();
- Preconditions.checkState(stmt.isAnalyzed());
- // Analyzed query statement to be rewritten.
- QueryStmt queryStmt = null;
- if (stmt instanceof QueryStmt) {
- queryStmt = (QueryStmt) analysisResult.getStmt();
- } else if (stmt instanceof InsertStmt) {
- queryStmt = ((InsertStmt) analysisResult.getStmt()).getQueryStmt();
- } else if (stmt instanceof CreateTableAsSelectStmt) {
- queryStmt = ((CreateTableAsSelectStmt) analysisResult.getStmt()).getQueryStmt();
- } else if (analysisResult.isUpdateStmt()) {
- queryStmt = ((UpdateStmt) analysisResult.getStmt()).getQueryStmt();
- } else if (analysisResult.isDeleteStmt()) {
- queryStmt = ((DeleteStmt) analysisResult.getStmt()).getQueryStmt();
- } else {
- throw new AnalysisException("Unsupported statement containing subqueries: " +
- stmt.toSql());
- }
- rewriteQueryStatement(queryStmt, queryStmt.getAnalyzer());
- stmt.reset();
- return stmt;
- }
-
- /**
- * Calls the appropriate rewrite method based on the specific type of query stmt. See
- * rewriteSelectStatement() and rewriteUnionStatement() documentation.
- */
- public static void rewriteQueryStatement(QueryStmt stmt, Analyzer analyzer)
- throws AnalysisException {
- Preconditions.checkNotNull(stmt);
- Preconditions.checkNotNull(stmt.isAnalyzed());
- if (stmt instanceof SelectStmt) {
- rewriteSelectStatement((SelectStmt)stmt, analyzer);
- } else if (stmt instanceof UnionStmt) {
- rewriteUnionStatement((UnionStmt)stmt, analyzer);
- } else {
- throw new AnalysisException("Subqueries not supported for " +
- stmt.getClass().getSimpleName() + " statements");
- }
- }
-
- /**
- * Rewrite all the subqueries of a SelectStmt in place. Subqueries
- * are currently supported in FROM and WHERE clauses. The rewrite is performed in
- * place and not in a clone of SelectStmt because it requires the stmt to be analyzed.
- */
- private static void rewriteSelectStatement(SelectStmt stmt, Analyzer analyzer)
- throws AnalysisException {
- // Rewrite all the subqueries in the FROM clause.
- for (TableRef tblRef: stmt.fromClause_) {
- if (!(tblRef instanceof InlineViewRef)) continue;
- InlineViewRef inlineViewRef = (InlineViewRef)tblRef;
- rewriteQueryStatement(inlineViewRef.getViewStmt(), inlineViewRef.getAnalyzer());
- }
- // Rewrite all the subqueries in the WHERE clause.
- if (stmt.hasWhereClause()) {
- // Push negation to leaf operands.
- stmt.whereClause_ = Expr.pushNegationToOperands(stmt.whereClause_);
- // Check if we can rewrite the subqueries in the WHERE clause. OR predicates with
- // subqueries are not supported.
- if (hasSubqueryInDisjunction(stmt.whereClause_)) {
- throw new AnalysisException("Subqueries in OR predicates are not supported: " +
- stmt.whereClause_.toSql());
- }
- rewriteWhereClauseSubqueries(stmt, analyzer);
- }
- stmt.sqlString_ = null;
- LOG.trace("rewritten stmt: " + stmt.toSql());
- }
-
- /**
- * Rewrite all operands in a UNION. The conditions that apply to SelectStmt rewriting
- * also apply here.
- */
- private static void rewriteUnionStatement(UnionStmt stmt, Analyzer analyzer)
- throws AnalysisException {
- for (UnionOperand operand: stmt.getOperands()) {
- Preconditions.checkState(operand.getQueryStmt() instanceof SelectStmt);
- StmtRewriter.rewriteSelectStatement(
- (SelectStmt)operand.getQueryStmt(), operand.getAnalyzer());
- }
- }
-
- /**
- * Returns true if the Expr tree rooted at 'expr' has at least one subquery
- * that participates in a disjunction.
- */
- private static boolean hasSubqueryInDisjunction(Expr expr) {
- if (!(expr instanceof CompoundPredicate)) return false;
- if (Expr.IS_OR_PREDICATE.apply(expr)) {
- return expr.contains(Subquery.class);
- }
- for (Expr child: expr.getChildren()) {
- if (hasSubqueryInDisjunction(child)) return true;
- }
- return false;
- }
-
- /**
- * Rewrite all subqueries of a stmt's WHERE clause. Initially, all the
- * conjuncts containing subqueries are extracted from the WHERE clause and are
- * replaced with true BoolLiterals. Subsequently, each extracted conjunct is
- * merged into its parent select block by converting it into a join.
- * Conjuncts with subqueries that themselves contain conjuncts with subqueries are
- * recursively rewritten in a bottom up fashion.
- *
- * The following example illustrates the bottom up rewriting of nested queries.
- * Suppose we have the following three level nested query Q0:
- *
- * SELECT *
- * FROM T1 : Q0
- * WHERE T1.a IN (SELECT a
- * FROM T2 WHERE T2.b IN (SELECT b
- * FROM T3))
- * AND T1.c < 10;
- *
- * This query will be rewritten as follows. Initially, the IN predicate
- * T1.a IN (SELECT a FROM T2 WHERE T2.b IN (SELECT b FROM T3)) is extracted
- * from the top level block (Q0) since it contains a subquery and is
- * replaced by a true BoolLiteral, resulting in the following query Q1:
- *
- * SELECT * FROM T1 WHERE TRUE : Q1
- *
- * Since the stmt in the extracted predicate contains a conjunct with a subquery,
- * it is also rewritten. As before, rewriting stmt SELECT a FROM T2
- * WHERE T2.b IN (SELECT b FROM T3) works by first extracting the conjunct that
- * contains the subquery (T2.b IN (SELECT b FROM T3)) and substituting it with
- * a true BoolLiteral, producing the following stmt Q2:
- *
- * SELECT a FROM T2 WHERE TRUE : Q2
- *
- * The predicate T2.b IN (SELECT b FROM T3) is then merged with Q2,
- * producing the following unnested query Q3:
- *
- * SELECT a FROM T2 LEFT SEMI JOIN (SELECT b FROM T3) $a$1 ON T2.b = $a$1.b : Q3
- *
- * The extracted IN predicate becomes:
- *
- * T1.a IN (SELECT a FROM T2 LEFT SEMI JOIN (SELECT b FROM T3) $a$1 ON T2.b = $a$1.b)
- *
- * Finally, the rewritten IN predicate is merged with query block Q1,
- * producing the following unnested query (WHERE clauses that contain only
- * conjunctions of true BoolLiterals are eliminated):
- *
- * SELECT *
- * FROM T1 LEFT SEMI JOIN (SELECT a
- * FROM T2 LEFT SEMI JOIN (SELECT b FROM T3) $a$1
- * ON T2.b = $a$1.b) $a$1
- * ON $a$1.a = T1.a
- * WHERE T1.c < 10;
- *
- */
- private static void rewriteWhereClauseSubqueries(SelectStmt stmt, Analyzer analyzer)
- throws AnalysisException {
- int numTableRefs = stmt.fromClause_.size();
- ArrayList<Expr> exprsWithSubqueries = Lists.newArrayList();
- ExprSubstitutionMap smap = new ExprSubstitutionMap();
- // Replace all BetweenPredicates with their equivalent compound predicates.
- stmt.whereClause_ = rewriteBetweenPredicates(stmt.whereClause_);
- // Check if all the conjuncts in the WHERE clause that contain subqueries
- // can currently be rewritten as a join.
- for (Expr conjunct: stmt.whereClause_.getConjuncts()) {
- List<Subquery> subqueries = Lists.newArrayList();
- conjunct.collectAll(Predicates.instanceOf(Subquery.class), subqueries);
- if (subqueries.size() == 0) continue;
- if (subqueries.size() > 1) {
- throw new AnalysisException("Multiple subqueries are not supported in " +
- "expression: " + conjunct.toSql());
- }
- if (!(conjunct instanceof InPredicate) && !(conjunct instanceof ExistsPredicate) &&
- !(conjunct instanceof BinaryPredicate) &&
- !conjunct.contains(Expr.IS_SCALAR_SUBQUERY)) {
- throw new AnalysisException("Non-scalar subquery is not supported in " +
- "expression: " + conjunct.toSql());
- }
-
- if (conjunct instanceof ExistsPredicate) {
- // Check if we can determine the result of an ExistsPredicate during analysis.
- // If so, replace the predicate with a BoolLiteral predicate and remove it from
- // the list of predicates to be rewritten.
- BoolLiteral boolLiteral = replaceExistsPredicate((ExistsPredicate) conjunct);
- if (boolLiteral != null) {
- boolLiteral.analyze(analyzer);
- smap.put(conjunct, boolLiteral);
- continue;
- }
- }
-
- // Replace all the supported exprs with subqueries with true BoolLiterals
- // using an smap.
- BoolLiteral boolLiteral = new BoolLiteral(true);
- boolLiteral.analyze(analyzer);
- smap.put(conjunct, boolLiteral);
- exprsWithSubqueries.add(conjunct);
- }
- stmt.whereClause_ = stmt.whereClause_.substitute(smap, analyzer, false);
-
- boolean hasNewVisibleTuple = false;
- // Recursively rewrite all the exprs that contain subqueries and merge them
- // with 'stmt'.
- for (Expr expr: exprsWithSubqueries) {
- if (mergeExpr(stmt, rewriteExpr(expr, analyzer), analyzer)) {
- hasNewVisibleTuple = true;
- }
- }
- if (canEliminate(stmt.whereClause_)) stmt.whereClause_ = null;
- if (hasNewVisibleTuple) replaceUnqualifiedStarItems(stmt, numTableRefs);
- }
-
- /**
- * Replace an ExistsPredicate that contains a subquery with a BoolLiteral if we
- * can determine its result without evaluating it. Return null if the result of the
- * ExistsPredicate can only be determined at run-time.
- */
- private static BoolLiteral replaceExistsPredicate(ExistsPredicate predicate) {
- Subquery subquery = predicate.getSubquery();
- Preconditions.checkNotNull(subquery);
- SelectStmt subqueryStmt = (SelectStmt) subquery.getStatement();
- BoolLiteral boolLiteral = null;
- if (subqueryStmt.getAnalyzer().hasEmptyResultSet()) {
- boolLiteral = new BoolLiteral(predicate.isNotExists());
- } else if (subqueryStmt.hasAggInfo() && subqueryStmt.getAggInfo().hasAggregateExprs()
- && !subqueryStmt.hasAnalyticInfo() && subqueryStmt.getHavingPred() == null) {
- boolLiteral = new BoolLiteral(!predicate.isNotExists());
- }
- return boolLiteral;
- }
-
- /**
- * Replace all BetweenPredicates with their equivalent compound predicates from the
- * expr tree rooted at 'expr'. The modified expr tree is returned.
- */
- private static Expr rewriteBetweenPredicates(Expr expr) {
- if (expr instanceof BetweenPredicate) {
- return ((BetweenPredicate)expr).getRewrittenPredicate();
- }
- for (int i = 0; i < expr.getChildren().size(); ++i) {
- expr.setChild(i, rewriteBetweenPredicates(expr.getChild(i)));
- }
- return expr;
- }
-
- /**
- * Modifies in place an expr that contains a subquery by rewriting its
- * subquery stmt. The modified analyzed expr is returned.
- */
- private static Expr rewriteExpr(Expr expr, Analyzer analyzer)
- throws AnalysisException {
- // Extract the subquery and rewrite it.
- Subquery subquery = expr.getSubquery();
- Preconditions.checkNotNull(subquery);
- rewriteSelectStatement((SelectStmt) subquery.getStatement(), subquery.getAnalyzer());
- // Create a new Subquery with the rewritten stmt and use a substitution map
- // to replace the original subquery from the expr.
- QueryStmt rewrittenStmt = subquery.getStatement().clone();
- rewrittenStmt.reset();
- Subquery newSubquery = new Subquery(rewrittenStmt);
- newSubquery.analyze(analyzer);
- ExprSubstitutionMap smap = new ExprSubstitutionMap();
- smap.put(subquery, newSubquery);
- return expr.substitute(smap, analyzer, false);
- }
-
- /**
- * Merge an expr containing a subquery with a SelectStmt 'stmt' by
- * converting the subquery stmt of the former into an inline view and
- * creating a join between the new inline view and the right-most table
- * from 'stmt'. Return true if the rewrite introduced a new visible tuple
- * due to a CROSS JOIN or a LEFT OUTER JOIN.
- *
- * This process works as follows:
- * 1. Create a new inline view with the subquery as the view's stmt. Changes
- * made to the subquery's stmt will affect the inline view.
- * 2. Extract all correlated predicates from the subquery's WHERE
- * clause; the subquery's select list may be extended with new items and a
- * GROUP BY clause may be added.
- * 3. Add the inline view to stmt's tableRefs and create a
- * join (left semi join, anti-join, left outer join for agg functions
- * that return a non-NULL value for an empty input, or cross-join) with
- * stmt's right-most table.
- * 4. Initialize the ON clause of the new join from the original subquery
- * predicate and the new inline view.
- * 5. Apply expr substitutions such that the extracted correlated predicates
- * refer to columns of the new inline view.
- * 6. Add all extracted correlated predicates to the ON clause.
- */
- private static boolean mergeExpr(SelectStmt stmt, Expr expr,
- Analyzer analyzer) throws AnalysisException {
- Preconditions.checkNotNull(expr);
- Preconditions.checkNotNull(analyzer);
- boolean updateSelectList = false;
-
- SelectStmt subqueryStmt = (SelectStmt)expr.getSubquery().getStatement();
- // Create a new inline view from the subquery stmt. The inline view will be added
- // to the stmt's table refs later. Explicitly set the inline view's column labels
- // to eliminate any chance that column aliases from the parent query could reference
- // select items from the inline view after the rewrite.
- List<String> colLabels = Lists.newArrayList();
- for (int i = 0; i < subqueryStmt.getColLabels().size(); ++i) {
- colLabels.add(subqueryStmt.getColumnAliasGenerator().getNextAlias());
- }
- InlineViewRef inlineView = new InlineViewRef(
- stmt.getTableAliasGenerator().getNextAlias(), subqueryStmt, colLabels);
-
- // Extract all correlated predicates from the subquery.
- List<Expr> onClauseConjuncts = extractCorrelatedPredicates(subqueryStmt);
- if (!onClauseConjuncts.isEmpty()) {
- canRewriteCorrelatedSubquery(expr, onClauseConjuncts);
- // For correlated subqueries that are eligible for rewrite by transforming
- // into a join, a LIMIT clause has no effect on the results, so we can
- // safely remove it.
- subqueryStmt.limitElement_ = new LimitElement(null, null);
- }
-
- // Update the subquery's select list and/or its GROUP BY clause by adding
- // exprs from the extracted correlated predicates.
- boolean updateGroupBy = expr.getSubquery().isScalarSubquery()
- || (expr instanceof ExistsPredicate
- && !subqueryStmt.getSelectList().isDistinct()
- && subqueryStmt.hasAggInfo());
- List<Expr> lhsExprs = Lists.newArrayList();
- List<Expr> rhsExprs = Lists.newArrayList();
- for (Expr conjunct: onClauseConjuncts) {
- updateInlineView(inlineView, conjunct, stmt.getTableRefIds(),
- lhsExprs, rhsExprs, updateGroupBy);
- }
-
- // Analyzing the inline view triggers reanalysis of the subquery's select statement.
- // However the statement is already analyzed and since statement analysis is not
- // idempotent, the analysis needs to be reset.
- inlineView.reset();
- inlineView.analyze(analyzer);
- inlineView.setLeftTblRef(stmt.fromClause_.get(stmt.fromClause_.size() - 1));
- stmt.fromClause_.add(inlineView);
- JoinOperator joinOp = JoinOperator.LEFT_SEMI_JOIN;
-
- // Create a join conjunct from the expr that contains a subquery.
- Expr joinConjunct = createJoinConjunct(expr, inlineView, analyzer,
- !onClauseConjuncts.isEmpty());
- if (joinConjunct != null) {
- SelectListItem firstItem =
- ((SelectStmt) inlineView.getViewStmt()).getSelectList().getItems().get(0);
- if (!onClauseConjuncts.isEmpty() &&
- firstItem.getExpr().contains(Expr.NON_NULL_EMPTY_AGG)) {
- // Correlated subqueries with an aggregate function that returns non-null on
- // an empty input are rewritten using a LEFT OUTER JOIN because we
- // need to ensure that there is one agg value for every tuple of 'stmt'
- // (parent select block), even for those tuples of 'stmt' that get rejected
- // by the subquery due to some predicate. The new join conjunct is added to
- // stmt's WHERE clause because it needs to be applied to the result of the
- // LEFT OUTER JOIN (both matched and unmatched tuples).
- //
- // TODO Handle other aggregate functions and UDAs that return a non-NULL value
- // on an empty set.
- // TODO Handle count aggregate functions in an expression in subqueries
- // select list.
- stmt.whereClause_ =
- CompoundPredicate.createConjunction(joinConjunct, stmt.whereClause_);
- joinConjunct = null;
- joinOp = JoinOperator.LEFT_OUTER_JOIN;
- updateSelectList = true;
- }
-
- if (joinConjunct != null) onClauseConjuncts.add(joinConjunct);
- }
-
- // Create the ON clause from the extracted correlated predicates.
- Expr onClausePredicate =
- CompoundPredicate.createConjunctivePredicate(onClauseConjuncts);
-
- if (onClausePredicate == null) {
- Preconditions.checkState(expr instanceof ExistsPredicate);
- ExistsPredicate existsPred = (ExistsPredicate) expr;
- // TODO This is very expensive if uncorrelated. Remove it when we implement
- // independent subquery evaluation.
- if (existsPred.isNotExists()) {
- inlineView.setJoinOp(JoinOperator.LEFT_ANTI_JOIN);
- } else {
- inlineView.setJoinOp(JoinOperator.LEFT_SEMI_JOIN);
- }
- // Note that the concept of a 'correlated inline view' is similar but not the same
- // as a 'correlated subquery', i.e., a subquery with a correlated predicate.
- if (!inlineView.isCorrelated()) {
- // For uncorrelated subqueries, we limit the number of rows returned by the
- // subquery.
- subqueryStmt.setLimit(1);
- inlineView.setOnClause(new BoolLiteral(true));
- }
- return false;
- }
-
- // Create an smap from the original select-list exprs of the select list to
- // the corresponding inline-view columns.
- ExprSubstitutionMap smap = new ExprSubstitutionMap();
- Preconditions.checkState(lhsExprs.size() == rhsExprs.size());
- for (int i = 0; i < lhsExprs.size(); ++i) {
- Expr lhsExpr = lhsExprs.get(i);
- Expr rhsExpr = rhsExprs.get(i);
- rhsExpr.analyze(analyzer);
- smap.put(lhsExpr, rhsExpr);
- }
- onClausePredicate = onClausePredicate.substitute(smap, analyzer, false);
-
- // Check for references to ancestor query blocks (cycles in the dependency
- // graph of query blocks are not supported).
- if (!onClausePredicate.isBoundByTupleIds(stmt.getTableRefIds())) {
- throw new AnalysisException("Unsupported correlated subquery: " +
- subqueryStmt.toSql());
- }
-
- // Check if we have a valid ON clause for an equi-join.
- boolean hasEqJoinPred = false;
- for (Expr conjunct: onClausePredicate.getConjuncts()) {
- if (!(conjunct instanceof BinaryPredicate)) continue;
- BinaryPredicate.Operator operator = ((BinaryPredicate) conjunct).getOp();
- if (!operator.isEquivalence()) continue;
- List<TupleId> lhsTupleIds = Lists.newArrayList();
- conjunct.getChild(0).getIds(lhsTupleIds, null);
- if (lhsTupleIds.isEmpty()) continue;
- List<TupleId> rhsTupleIds = Lists.newArrayList();
- conjunct.getChild(1).getIds(rhsTupleIds, null);
- if (rhsTupleIds.isEmpty()) continue;
- // Check if columns from the outer query block (stmt) appear in both sides
- // of the binary predicate.
- if ((lhsTupleIds.contains(inlineView.getDesc().getId()) && lhsTupleIds.size() > 1)
- || (rhsTupleIds.contains(inlineView.getDesc().getId())
- && rhsTupleIds.size() > 1)) {
- continue;
- }
- hasEqJoinPred = true;
- break;
- }
-
- if (!hasEqJoinPred && !inlineView.isCorrelated()) {
- // TODO: Remove this when independent subquery evaluation is implemented.
- // TODO: Requires support for non-equi joins.
- boolean hasGroupBy = ((SelectStmt) inlineView.getViewStmt()).hasGroupByClause();
- if (!expr.getSubquery().isScalarSubquery() ||
- (!(hasGroupBy && stmt.selectList_.isDistinct()) && hasGroupBy)) {
- throw new AnalysisException("Unsupported predicate with subquery: " +
- expr.toSql());
- }
-
- // TODO: Requires support for null-aware anti-join mode in nested-loop joins
- if (expr.getSubquery().isScalarSubquery() && expr instanceof InPredicate
- && ((InPredicate) expr).isNotIn()) {
- throw new AnalysisException("Unsupported NOT IN predicate with subquery: " +
- expr.toSql());
- }
-
- // We can rewrite the aggregate subquery using a cross join. All conjuncts
- // that were extracted from the subquery are added to stmt's WHERE clause.
- stmt.whereClause_ =
- CompoundPredicate.createConjunction(onClausePredicate, stmt.whereClause_);
- inlineView.setJoinOp(JoinOperator.CROSS_JOIN);
- // Indicate that the CROSS JOIN may add a new visible tuple to stmt's
- // select list (if the latter contains an unqualified star item '*')
- return true;
- }
-
- // We have a valid equi-join conjunct or the inline view is correlated.
- if (expr instanceof InPredicate && ((InPredicate)expr).isNotIn() ||
- expr instanceof ExistsPredicate && ((ExistsPredicate)expr).isNotExists()) {
- // For the case of a NOT IN with an eq join conjunct, replace the join
- // conjunct with a conjunct that uses the null-matching eq operator.
- if (expr instanceof InPredicate) {
- joinOp = JoinOperator.NULL_AWARE_LEFT_ANTI_JOIN;
- List<TupleId> tIds = Lists.newArrayList();
- joinConjunct.getIds(tIds, null);
- if (tIds.size() <= 1 || !tIds.contains(inlineView.getDesc().getId())) {
- throw new AnalysisException("Unsupported NOT IN predicate with subquery: " +
- expr.toSql());
- }
- // Replace the EQ operator in the generated join conjunct with a
- // null-matching EQ operator.
- for (Expr conjunct: onClausePredicate.getConjuncts()) {
- if (conjunct.equals(joinConjunct)) {
- Preconditions.checkState(conjunct instanceof BinaryPredicate);
- BinaryPredicate binaryPredicate = (BinaryPredicate)conjunct;
- Preconditions.checkState(binaryPredicate.getOp().isEquivalence());
- binaryPredicate.setOp(BinaryPredicate.Operator.NULL_MATCHING_EQ);
- break;
- }
- }
- } else {
- joinOp = JoinOperator.LEFT_ANTI_JOIN;
- }
- }
- inlineView.setJoinOp(joinOp);
- inlineView.setOnClause(onClausePredicate);
- return updateSelectList;
- }
-
- /**
- * Replace all unqualified star exprs ('*') from stmt's select list with qualified
- * ones, i.e. tbl_1.*,...,tbl_n.*, where tbl_1,...,tbl_n are the visible tablerefs
- * in stmt. 'tableIndx' indicates the maximum tableRef ordinal to consider when
- * replacing an unqualified star item.
- */
- private static void replaceUnqualifiedStarItems(SelectStmt stmt, int tableIdx) {
- Preconditions.checkState(tableIdx < stmt.fromClause_.size());
- ArrayList<SelectListItem> newItems = Lists.newArrayList();
- for (int i = 0; i < stmt.selectList_.getItems().size(); ++i) {
- SelectListItem item = stmt.selectList_.getItems().get(i);
- if (!item.isStar() || item.getRawPath() != null) {
- newItems.add(item);
- continue;
- }
- // '*' needs to be replaced by tbl1.*,...,tbln.*, where
- // tbl1,...,tbln are the visible tableRefs in stmt.
- for (int j = 0; j < tableIdx; ++j) {
- TableRef tableRef = stmt.fromClause_.get(j);
- if (tableRef.getJoinOp() == JoinOperator.LEFT_SEMI_JOIN ||
- tableRef.getJoinOp() == JoinOperator.LEFT_ANTI_JOIN) {
- continue;
- }
- newItems.add(SelectListItem.createStarItem(
- Lists.newArrayList(tableRef.getUniqueAlias())));
- }
- }
- Preconditions.checkState(!newItems.isEmpty());
- boolean isDistinct = stmt.selectList_.isDistinct();
- stmt.selectList_ =
- new SelectList(newItems, isDistinct, stmt.selectList_.getPlanHints());
- }
-
- /**
- * Return true if the Expr tree rooted at 'expr' can be safely
- * eliminated, i.e. it only consists of conjunctions of true BoolLiterals.
- */
- private static boolean canEliminate(Expr expr) {
- for (Expr conjunct: expr.getConjuncts()) {
- if (!Expr.IS_TRUE_LITERAL.apply(conjunct)) return false;
- }
- return true;
- }
-
- /**
- * Extract all correlated predicates of a subquery.
- *
- * TODO Handle correlated predicates in a HAVING clause.
- */
- private static ArrayList<Expr> extractCorrelatedPredicates(SelectStmt subqueryStmt)
- throws AnalysisException {
- List<TupleId> subqueryTupleIds = subqueryStmt.getTableRefIds();
- ArrayList<Expr> correlatedPredicates = Lists.newArrayList();
-
- if (subqueryStmt.hasWhereClause()) {
- if (!canExtractCorrelatedPredicates(subqueryStmt.getWhereClause(),
- subqueryTupleIds)) {
- throw new AnalysisException("Disjunctions with correlated predicates " +
- "are not supported: " + subqueryStmt.getWhereClause().toSql());
- }
- // Extract the correlated predicates from the subquery's WHERE clause and
- // replace them with true BoolLiterals.
- Expr newWhereClause = extractCorrelatedPredicates(subqueryStmt.getWhereClause(),
- subqueryTupleIds, correlatedPredicates);
- if (canEliminate(newWhereClause)) newWhereClause = null;
- subqueryStmt.setWhereClause(newWhereClause);
- }
-
- // Process all correlated predicates from subquery's ON clauses.
- for (TableRef tableRef: subqueryStmt.getTableRefs()) {
- if (tableRef.getOnClause() == null) continue;
-
- ArrayList<Expr> onClauseCorrelatedPreds = Lists.newArrayList();
- Expr newOnClause = extractCorrelatedPredicates(tableRef.getOnClause(),
- subqueryTupleIds, onClauseCorrelatedPreds);
- if (onClauseCorrelatedPreds.isEmpty()) continue;
-
- correlatedPredicates.addAll(onClauseCorrelatedPreds);
- if (canEliminate(newOnClause)) {
- // After the extraction of correlated predicates from an ON clause,
- // the latter may only contain conjunctions of True BoolLiterals. In
- // this case, we can eliminate the ON clause and set the join type to
- // CROSS JOIN.
- tableRef.setJoinOp(JoinOperator.CROSS_JOIN);
- tableRef.setOnClause(null);
- } else {
- tableRef.setOnClause(newOnClause);
- }
- }
- return correlatedPredicates;
- }
-
- /**
- * Extract all correlated predicates from the expr tree rooted at 'root' and
- * replace them with true BoolLiterals. The modified expr tree is returned
- * and the extracted correlated predicates are added to 'matches'.
- */
- private static Expr extractCorrelatedPredicates(Expr root, List<TupleId> tupleIds,
- ArrayList<Expr> matches) {
- if (isCorrelatedPredicate(root, tupleIds)) {
- matches.add(root);
- return new BoolLiteral(true);
- }
- for (int i = 0; i < root.getChildren().size(); ++i) {
- root.getChildren().set(i, extractCorrelatedPredicates(root.getChild(i), tupleIds,
- matches));
- }
- return root;
- }
-
- /**
- * Checks if an expr containing a correlated subquery is eligible for rewrite by
- * tranforming into a join. 'correlatedPredicates' contains the correlated
- * predicates identified in the subquery. Throws an AnalysisException if 'expr'
- * is not eligible for rewrite.
- * TODO: Merge all the rewrite eligibility tests into a single function.
- */
- private static void canRewriteCorrelatedSubquery(Expr expr,
- List<Expr> correlatedPredicates) throws AnalysisException {
- Preconditions.checkNotNull(expr);
- Preconditions.checkNotNull(correlatedPredicates);
- Preconditions.checkState(expr.contains(Subquery.class));
- SelectStmt stmt = (SelectStmt) expr.getSubquery().getStatement();
- Preconditions.checkNotNull(stmt);
- // Grouping and/or aggregation is not allowed on correlated scalar and IN subqueries
- if ((expr instanceof BinaryPredicate
- && (stmt.hasGroupByClause() || stmt.hasAnalyticInfo()))
- || (expr instanceof InPredicate
- && (stmt.hasAggInfo() || stmt.hasAnalyticInfo()))) {
- throw new AnalysisException("Unsupported correlated subquery with grouping " +
- "and/or aggregation: " + stmt.toSql());
- }
-
- final com.google.common.base.Predicate<Expr> isSingleSlotRef =
- new com.google.common.base.Predicate<Expr>() {
- @Override
- public boolean apply(Expr arg) { return arg.unwrapSlotRef(false) != null; }
- };
-
- // A HAVING clause is only allowed on correlated EXISTS subqueries with
- // correlated binary predicates of the form Slot = Slot (see IMPALA-2734)
- // TODO Handle binary predicates with IS NOT DISTINCT op
- if (expr instanceof ExistsPredicate && stmt.hasHavingClause()
- && !correlatedPredicates.isEmpty()
- && (!stmt.hasAggInfo()
- || !Iterables.all(correlatedPredicates,
- Predicates.or(Expr.IS_EQ_BINARY_PREDICATE, isSingleSlotRef)))) {
- throw new AnalysisException("Unsupported correlated EXISTS subquery with a " +
- "HAVING clause: " + stmt.toSql());
- }
-
- // The following correlated subqueries with a limit clause are supported:
- // 1. EXISTS subqueries
- // 2. Scalar subqueries with aggregation
- if (stmt.hasLimit() &&
- (!(expr instanceof BinaryPredicate) || !stmt.hasAggInfo() ||
- stmt.selectList_.isDistinct()) &&
- !(expr instanceof ExistsPredicate)) {
- throw new AnalysisException("Unsupported correlated subquery with a " +
- "LIMIT clause: " + stmt.toSql());
- }
- }
-
- /**
- * Update the subquery within an inline view by expanding its select list with exprs
- * from a correlated predicate 'expr' that will be 'moved' to an ON clause in the
- * subquery's parent query block. We need to make sure that every expr extracted from
- * the subquery references an item in the subquery's select list. If 'updateGroupBy'
- * is true, the exprs extracted from 'expr' are also added in stmt's GROUP BY clause.
- * Throws an AnalysisException if we need to update the GROUP BY clause but
- * both the lhs and rhs of 'expr' reference a tuple of the subquery stmt.
- */
- private static void updateInlineView(InlineViewRef inlineView, Expr expr,
- List<TupleId> parentQueryTids, List<Expr> lhsExprs, List<Expr> rhsExprs,
- boolean updateGroupBy) throws AnalysisException {
- SelectStmt stmt = (SelectStmt)inlineView.getViewStmt();
- List<TupleId> subqueryTblIds = stmt.getTableRefIds();
- ArrayList<Expr> groupByExprs = null;
- if (updateGroupBy) groupByExprs = Lists.newArrayList();
-
- List<SelectListItem> items = stmt.selectList_.getItems();
- // Collect all the SlotRefs from 'expr' and identify those that are bound by
- // subquery tuple ids.
- ArrayList<Expr> slotRefs = Lists.newArrayList();
- expr.collectAll(Predicates.instanceOf(SlotRef.class), slotRefs);
- List<Expr> exprsBoundBySubqueryTids = Lists.newArrayList();
- for (Expr slotRef: slotRefs) {
- if (slotRef.isBoundByTupleIds(subqueryTblIds)) {
- exprsBoundBySubqueryTids.add(slotRef);
- }
- }
- // The correlated predicate only references slots from a parent block,
- // no need to update the subquery's select or group by list.
- if (exprsBoundBySubqueryTids.isEmpty()) return;
- if (updateGroupBy) {
- Preconditions.checkState(expr instanceof BinaryPredicate);
- Expr exprBoundBySubqueryTids = null;
- if (exprsBoundBySubqueryTids.size() > 1) {
- // If the predicate contains multiple SlotRefs bound by subquery tuple
- // ids, they must all be on the same side of that predicate.
- if (expr.getChild(0).isBoundByTupleIds(subqueryTblIds) &&
- expr.getChild(1).isBoundByTupleIds(parentQueryTids)) {
- exprBoundBySubqueryTids = expr.getChild(0);
- } else if (expr.getChild(0).isBoundByTupleIds(parentQueryTids) &&
- expr.getChild(1).isBoundByTupleIds(subqueryTblIds)) {
- exprBoundBySubqueryTids = expr.getChild(1);
- } else {
- throw new AnalysisException("All subquery columns " +
- "that participate in a predicate must be on the same side of " +
- "that predicate: " + expr.toSql());
- }
- } else {
- Preconditions.checkState(exprsBoundBySubqueryTids.size() == 1);
- exprBoundBySubqueryTids = exprsBoundBySubqueryTids.get(0);
- }
- exprsBoundBySubqueryTids.clear();
- exprsBoundBySubqueryTids.add(exprBoundBySubqueryTids);
- }
-
- // Add the exprs bound by subquery tuple ids to the select list and
- // register it for substitution. We use a temporary substitution map
- // because we cannot at this point analyze the new select list expr. Once
- // the new inline view is analyzed, the entries from this map will be
- // added to an ExprSubstitutionMap.
- for (Expr boundExpr: exprsBoundBySubqueryTids) {
- String colAlias = stmt.getColumnAliasGenerator().getNextAlias();
- items.add(new SelectListItem(boundExpr, null));
- inlineView.getExplicitColLabels().add(colAlias);
- lhsExprs.add(boundExpr);
- rhsExprs.add(new SlotRef(Lists.newArrayList(inlineView.getUniqueAlias(), colAlias)));
- if (groupByExprs != null) groupByExprs.add(boundExpr);
- }
-
- // Update the subquery's select list.
- boolean isDistinct = stmt.selectList_.isDistinct();
- stmt.selectList_ = new SelectList(
- items, isDistinct, stmt.selectList_.getPlanHints());
- // Update subquery's GROUP BY clause
- if (groupByExprs != null && !groupByExprs.isEmpty()) {
- if (stmt.hasGroupByClause()) {
- stmt.groupingExprs_.addAll(groupByExprs);
- } else {
- stmt.groupingExprs_ = groupByExprs;
- }
- }
- }
-
- /**
- * Returns true if we can extract the correlated predicates from 'expr'. A
- * correlated predicate cannot be extracted if it is part of a disjunction.
- */
- private static boolean canExtractCorrelatedPredicates(Expr expr,
- List<TupleId> subqueryTupleIds) {
- if (!(expr instanceof CompoundPredicate)) return true;
- if (Expr.IS_OR_PREDICATE.apply(expr)) {
- return !containsCorrelatedPredicate(expr, subqueryTupleIds);
- }
- for (Expr child: expr.getChildren()) {
- if (!canExtractCorrelatedPredicates(child, subqueryTupleIds)) {
- return false;
- }
- }
- return true;
- }
-
- /**
- * Return true if the expr tree rooted at 'root' contains a correlated
- * predicate.
- */
- private static boolean containsCorrelatedPredicate(Expr root, List<TupleId> tupleIds) {
- if (isCorrelatedPredicate(root, tupleIds)) return true;
- for (Expr child: root.getChildren()) {
- if (containsCorrelatedPredicate(child, tupleIds)) return true;
- }
- return false;
- }
-
- /**
- * Returns true if 'expr' is a correlated predicate. A predicate is
- * correlated if at least one of its SlotRefs belongs to an ancestor
- * query block (i.e. is not bound by the given 'tupleIds').
- */
- private static boolean isCorrelatedPredicate(Expr expr, List<TupleId> tupleIds) {
- return (expr instanceof BinaryPredicate || expr instanceof SlotRef)
- && !expr.isBoundByTupleIds(tupleIds);
- }
-
- /**
- * Converts an expr containing a subquery into an analyzed conjunct to be
- * used in a join. The conversion is performed in place by replacing the
- * subquery with the first expr from the select list of 'inlineView'.
- * If 'isCorrelated' is true and the first expr from the inline view contains
- * an aggregate function that returns non-null on an empty input,
- * the aggregate function is wrapped into a 'zeroifnull' function.
- */
- private static Expr createJoinConjunct(Expr exprWithSubquery, InlineViewRef inlineView,
- Analyzer analyzer, boolean isCorrelated) throws AnalysisException {
- Preconditions.checkNotNull(exprWithSubquery);
- Preconditions.checkNotNull(inlineView);
- Preconditions.checkState(exprWithSubquery.contains(Subquery.class));
- if (exprWithSubquery instanceof ExistsPredicate) return null;
- // Create a SlotRef from the first item of inlineView's select list
- SlotRef slotRef = new SlotRef(Lists.newArrayList(
- inlineView.getUniqueAlias(), inlineView.getColLabels().get(0)));
- slotRef.analyze(analyzer);
- Expr subquerySubstitute = slotRef;
- if (exprWithSubquery instanceof InPredicate) {
- BinaryPredicate pred = new BinaryPredicate(BinaryPredicate.Operator.EQ,
- exprWithSubquery.getChild(0), slotRef);
- pred.analyze(analyzer);
- return pred;
- }
- // Only scalar subqueries are supported
- Subquery subquery = exprWithSubquery.getSubquery();
- if (!subquery.isScalarSubquery()) {
- throw new AnalysisException("Unsupported predicate with a non-scalar subquery: "
- + subquery.toSql());
- }
- ExprSubstitutionMap smap = new ExprSubstitutionMap();
- SelectListItem item =
- ((SelectStmt) inlineView.getViewStmt()).getSelectList().getItems().get(0);
- if (isCorrelated && !item.getExpr().contains(Expr.IS_BUILTIN_AGG_FN)) {
- throw new AnalysisException("UDAs are not supported in the select list of " +
- "correlated subqueries: " + subquery.toSql());
- }
- if (isCorrelated && item.getExpr().contains(Expr.NON_NULL_EMPTY_AGG)) {
- // TODO: Add support for multiple agg functions that return non-null on an
- // empty input, by wrapping them with zeroifnull functions before the inline
- // view is analyzed.
- if (!Expr.NON_NULL_EMPTY_AGG.apply(item.getExpr()) &&
- (!(item.getExpr() instanceof CastExpr) ||
- !Expr.NON_NULL_EMPTY_AGG.apply(item.getExpr().getChild(0)))) {
- throw new AnalysisException("Aggregate function that returns non-null on " +
- "an empty input cannot be used in an expression in a " +
- "correlated subquery's select list: " + subquery.toSql());
- }
-
- List<Expr> aggFns = Lists.newArrayList();
- item.getExpr().collectAll(Expr.NON_NULL_EMPTY_AGG, aggFns);
- // TODO Generalize this by making the aggregate functions aware of the
- // literal expr that they return on empty input, e.g. max returns a
- // NullLiteral whereas count returns a NumericLiteral.
- if (((FunctionCallExpr)aggFns.get(0)).getReturnType().isNumericType()) {
- FunctionCallExpr zeroIfNull = new FunctionCallExpr("zeroifnull",
- Lists.newArrayList((Expr) slotRef));
- zeroIfNull.analyze(analyzer);
- subquerySubstitute = zeroIfNull;
- } else if (((FunctionCallExpr)aggFns.get(0)).getReturnType().isStringType()) {
- List<Expr> params = Lists.newArrayList();
- params.add(slotRef);
- params.add(new StringLiteral(""));
- FunctionCallExpr ifnull = new FunctionCallExpr("ifnull", params);
- ifnull.analyze(analyzer);
- subquerySubstitute = ifnull;
- } else {
- throw new AnalysisException("Unsupported aggregate function used in " +
- "a correlated subquery's select list: " + subquery.toSql());
- }
- }
- smap.put(subquery, subquerySubstitute);
- return exprWithSubquery.substitute(smap, analyzer, false);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/StringLiteral.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/StringLiteral.java b/fe/src/main/java/com/cloudera/impala/analysis/StringLiteral.java
deleted file mode 100644
index be4820c..0000000
--- a/fe/src/main/java/com/cloudera/impala/analysis/StringLiteral.java
+++ /dev/null
@@ -1,174 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.analysis;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.math.BigDecimal;
-
-import java_cup.runtime.Symbol;
-
-import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
-
-import com.cloudera.impala.catalog.ScalarType;
-import com.cloudera.impala.catalog.Type;
-import com.cloudera.impala.common.AnalysisException;
-import com.cloudera.impala.thrift.TExprNode;
-import com.cloudera.impala.thrift.TExprNodeType;
-import com.cloudera.impala.thrift.TStringLiteral;
-import com.google.common.base.Objects;
-import com.google.common.base.Preconditions;
-
-public class StringLiteral extends LiteralExpr {
- private final String value_;
-
- public StringLiteral(String value) {
- this.value_ = value;
- type_ = ScalarType.STRING;
- evalCost_ = LITERAL_COST;
- }
-
- public StringLiteral(String value, Type type) {
- this.value_ = value;
- type_ = type;
- evalCost_ = LITERAL_COST;
- }
-
- /**
- * Copy c'tor used in clone().
- */
- protected StringLiteral(StringLiteral other) {
- super(other);
- value_ = other.value_;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (!super.equals(obj)) return false;
- return ((StringLiteral) obj).value_.equals(value_);
- }
-
- @Override
- public int hashCode() { return value_.hashCode(); }
-
- @Override
- public String toSqlImpl() {
- return "'" + value_ + "'";
- }
-
- @Override
- protected void toThrift(TExprNode msg) {
- msg.node_type = TExprNodeType.STRING_LITERAL;
- msg.string_literal = new TStringLiteral(getUnescapedValue());
- }
-
- public String getValue() { return value_; }
-
- public String getUnescapedValue() {
- // Unescape string exactly like Hive does. Hive's method assumes
- // quotes so we add them here to reuse Hive's code.
- return BaseSemanticAnalyzer.unescapeSQLString("'" + value_ + "'");
- }
-
- @Override
- public String getStringValue() {
- return value_;
- }
-
- @Override
- public String debugString() {
- return Objects.toStringHelper(this)
- .add("value", value_)
- .toString();
- }
-
- @Override
- protected Expr uncheckedCastTo(Type targetType) throws AnalysisException {
- Preconditions.checkState(targetType.isNumericType() || targetType.isDateType()
- || targetType.equals(this.type_) || targetType.isStringType());
- if (targetType.equals(this.type_)) {
- return this;
- } else if (targetType.isStringType()) {
- type_ = targetType;
- } else if (targetType.isNumericType()) {
- return convertToNumber();
- } else if (targetType.isDateType()) {
- // Let the BE do the cast so it is in Boost format
- return new CastExpr(targetType, this);
- }
- return this;
- }
-
- /**
- * Convert this string literal to numeric literal.
- *
- * @return new converted literal (not null)
- * the type of the literal is determined by the lexical scanner
- * @throws AnalysisException
- * if NumberFormatException occurs,
- * or if floating point value is NaN or infinite
- */
- public LiteralExpr convertToNumber()
- throws AnalysisException {
- StringReader reader = new StringReader(value_);
- SqlScanner scanner = new SqlScanner(reader);
- // For distinguishing positive and negative numbers.
- boolean negative = false;
- Symbol sym;
- try {
- // We allow simple chaining of MINUS to recognize negative numbers.
- // Currently we can't handle string literals containing full fledged expressions
- // which are implicitly cast to a numeric literal.
- // This would require invoking the parser.
- sym = scanner.next_token();
- while (sym.sym == SqlParserSymbols.SUBTRACT) {
- negative = !negative;
- sym = scanner.next_token();
- }
- } catch (IOException e) {
- throw new AnalysisException("Failed to convert string literal to number.", e);
- }
- if (sym.sym == SqlParserSymbols.NUMERIC_OVERFLOW) {
- throw new AnalysisException("Number too large: " + value_);
- }
- if (sym.sym == SqlParserSymbols.INTEGER_LITERAL) {
- BigDecimal val = (BigDecimal) sym.value;
- if (negative) val = val.negate();
- return new NumericLiteral(val);
- }
- if (sym.sym == SqlParserSymbols.DECIMAL_LITERAL) {
- BigDecimal val = (BigDecimal) sym.value;
- if (negative) val = val.negate();
- return new NumericLiteral(val);
- }
- // Symbol is not an integer or floating point literal.
- throw new AnalysisException(
- "Failed to convert string literal '" + value_ + "' to number.");
- }
-
- @Override
- public int compareTo(LiteralExpr o) {
- int ret = super.compareTo(o);
- if (ret != 0) return ret;
- StringLiteral other = (StringLiteral) o;
- return value_.compareTo(other.getStringValue());
- }
-
- @Override
- public Expr clone() { return new StringLiteral(this); }
-}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/Subquery.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/Subquery.java b/fe/src/main/java/com/cloudera/impala/analysis/Subquery.java
deleted file mode 100644
index d0e1b30..0000000
--- a/fe/src/main/java/com/cloudera/impala/analysis/Subquery.java
+++ /dev/null
@@ -1,157 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.analysis;
-
-import java.util.ArrayList;
-
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.cloudera.impala.catalog.ArrayType;
-import com.cloudera.impala.catalog.StructField;
-import com.cloudera.impala.catalog.StructType;
-import com.cloudera.impala.common.AnalysisException;
-import com.cloudera.impala.thrift.TExprNode;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-/**
- * Class representing a subquery. A Subquery consists of a QueryStmt and has
- * its own Analyzer context.
- */
-public class Subquery extends Expr {
- private final static Logger LOG = LoggerFactory.getLogger(Subquery.class);
-
- // The QueryStmt of the subquery.
- protected QueryStmt stmt_;
- // A subquery has its own analysis context
- protected Analyzer analyzer_;
-
- public Analyzer getAnalyzer() { return analyzer_; }
- public QueryStmt getStatement() { return stmt_; }
- @Override
- public String toSqlImpl() { return "(" + stmt_.toSql() + ")"; }
-
- /**
- * C'tor that initializes a Subquery from a QueryStmt.
- */
- public Subquery(QueryStmt queryStmt) {
- super();
- Preconditions.checkNotNull(queryStmt);
- stmt_ = queryStmt;
- }
-
- /**
- * Copy c'tor.
- */
- public Subquery(Subquery other) {
- super(other);
- stmt_ = other.stmt_.clone();
- analyzer_ = other.analyzer_;
- }
-
- /**
- * Analyzes the subquery in a child analyzer.
- */
- @Override
- public void analyze(Analyzer analyzer) throws AnalysisException {
- if (isAnalyzed_) return;
- super.analyze(analyzer);
- if (!(stmt_ instanceof SelectStmt)) {
- throw new AnalysisException("A subquery must contain a single select block: " +
- toSql());
- }
- // The subquery is analyzed with its own analyzer.
- analyzer_ = new Analyzer(analyzer);
- analyzer_.setIsSubquery();
- stmt_.analyze(analyzer_);
- // Check whether the stmt_ contains an illegal mix of un/correlated table refs.
- stmt_.getCorrelatedTupleIds(analyzer_);
-
- // Set the subquery type based on the types of the exprs in the
- // result list of the associated SelectStmt.
- ArrayList<Expr> stmtResultExprs = stmt_.getResultExprs();
- if (stmtResultExprs.size() == 1) {
- type_ = stmtResultExprs.get(0).getType();
- Preconditions.checkState(!type_.isComplexType());
- } else {
- type_ = createStructTypeFromExprList();
- }
-
- // If the subquery returns many rows, set its type to ArrayType.
- if (!((SelectStmt)stmt_).returnsSingleRow()) type_ = new ArrayType(type_);
-
- Preconditions.checkNotNull(type_);
- isAnalyzed_ = true;
- }
-
- @Override
- public boolean isConstant() { return false; }
-
- /**
- * Check if the subquery's SelectStmt returns a single column of scalar type.
- */
- public boolean returnsScalarColumn() {
- ArrayList<Expr> stmtResultExprs = stmt_.getResultExprs();
- if (stmtResultExprs.size() == 1 && stmtResultExprs.get(0).getType().isScalarType()) {
- return true;
- }
- return false;
- }
-
- /**
- * Create a StrucType from the result expr list of a subquery's SelectStmt.
- */
- private StructType createStructTypeFromExprList() {
- ArrayList<Expr> stmtResultExprs = stmt_.getResultExprs();
- ArrayList<StructField> structFields = Lists.newArrayList();
- // Check if we have unique labels
- ArrayList<String> labels = stmt_.getColLabels();
- boolean hasUniqueLabels = true;
- if (Sets.newHashSet(labels).size() != labels.size()) hasUniqueLabels = false;
-
- // Construct a StructField from each expr in the select list
- for (int i = 0; i < stmtResultExprs.size(); ++i) {
- Expr expr = stmtResultExprs.get(i);
- String fieldName = null;
- // Check if the label meets the Metastore's requirements.
- if (MetaStoreUtils.validateName(labels.get(i))) {
- fieldName = labels.get(i);
- // Make sure the field names are unique.
- if (!hasUniqueLabels) {
- fieldName = "_" + Integer.toString(i) + "_" + fieldName;
- }
- } else {
- // Use the expr ordinal to construct a StructField.
- fieldName = "_" + Integer.toString(i);
- }
- Preconditions.checkNotNull(fieldName);
- structFields.add(new StructField(fieldName, expr.getType(), null));
- }
- Preconditions.checkState(structFields.size() != 0);
- return new StructType(structFields);
- }
-
- @Override
- public Subquery clone() { return new Subquery(this); }
-
- @Override
- protected void toThrift(TExprNode msg) {}
-}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/TableName.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TableName.java b/fe/src/main/java/com/cloudera/impala/analysis/TableName.java
deleted file mode 100644
index 5fc8f7e..0000000
--- a/fe/src/main/java/com/cloudera/impala/analysis/TableName.java
+++ /dev/null
@@ -1,123 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.analysis;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-
-import com.cloudera.impala.common.AnalysisException;
-import com.cloudera.impala.thrift.TTableName;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-
-/**
- * Represents a table/view name that optionally includes its database (a fully qualified
- * table name). Analysis of this table name checks for validity of the database and
- * table name according to the Metastore's policy (see @MetaStoreUtils).
- * According to that definition, we can still use "invalid" table names for tables/views
- * that are not stored in the Metastore, e.g., for Inline Views or WITH-clause views.
- */
-public class TableName {
- private final String db_;
- private final String tbl_;
-
- public TableName(String db, String tbl) {
- super();
- Preconditions.checkArgument(db == null || !db.isEmpty());
- this.db_ = db;
- Preconditions.checkNotNull(tbl);
- this.tbl_ = tbl;
- }
-
- public String getDb() { return db_; }
- public String getTbl() { return tbl_; }
- public boolean isEmpty() { return tbl_.isEmpty(); }
-
- /**
- * Checks whether the db and table name meet the Metastore's requirements.
- */
- public void analyze() throws AnalysisException {
- if (db_ != null) {
- if (!MetaStoreUtils.validateName(db_)) {
- throw new AnalysisException("Invalid database name: " + db_);
- }
- }
- Preconditions.checkNotNull(tbl_);
- if (!MetaStoreUtils.validateName(tbl_)) {
- throw new AnalysisException("Invalid table/view name: " + tbl_);
- }
- }
-
- /**
- * Returns true if this name has a non-empty database field and a non-empty
- * table name.
- */
- public boolean isFullyQualified() {
- return db_ != null && !db_.isEmpty() && !tbl_.isEmpty();
- }
-
- public String toSql() {
- // Enclose the database and/or table name in quotes if Hive cannot parse them
- // without quotes. This is needed for view compatibility between Impala and Hive.
- if (db_ == null) {
- return ToSqlUtils.getIdentSql(tbl_);
- } else {
- return ToSqlUtils.getIdentSql(db_) + "." + ToSqlUtils.getIdentSql(tbl_);
- }
- }
-
- @Override
- public String toString() {
- if (db_ == null) {
- return tbl_;
- } else {
- return db_ + "." + tbl_;
- }
- }
-
- public List<String> toPath() {
- List<String> result = Lists.newArrayListWithCapacity(2);
- if (db_ != null) result.add(db_);
- result.add(tbl_);
- return result;
- }
-
- public static TableName fromThrift(TTableName tableName) {
- return new TableName(tableName.getDb_name(), tableName.getTable_name());
- }
-
- public TTableName toThrift() { return new TTableName(db_, tbl_); }
-
- /**
- * Returns true of the table names are considered equals. To check for equality,
- * a case-insensitive comparison of the database and table name is performed.
- */
- @Override
- public boolean equals(Object anObject) {
- if (anObject instanceof TableName) {
- return toString().toLowerCase().equals(anObject.toString().toLowerCase());
- }
- return false;
- }
-
- @Override
- public int hashCode() {
- return toString().toLowerCase().hashCode();
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java b/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
deleted file mode 100644
index 0ff0575..0000000
--- a/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
+++ /dev/null
@@ -1,587 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.analysis;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Set;
-
-import com.cloudera.impala.catalog.HdfsTable;
-import com.cloudera.impala.catalog.Table;
-import com.cloudera.impala.common.AnalysisException;
-import com.cloudera.impala.planner.JoinNode.DistributionMode;
-import com.cloudera.impala.thrift.TReplicaPreference;
-import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-/**
- * Superclass of all table references, including references to views, base tables
- * (Hdfs, HBase or DataSource tables), and nested collections. Contains the join
- * specification. An instance of a TableRef (and not a subclass thereof) represents
- * an unresolved table reference that must be resolved during analysis. All resolved
- * table references are subclasses of TableRef.
- *
- * The analysis of table refs follows a two-step process:
- *
- * 1. Resolution: A table ref's path is resolved and then the generic TableRef is
- * replaced by a concrete table ref (a BaseTableRef, CollectionTabeRef or ViewRef)
- * in the originating stmt and that is given the resolved path. This step is driven by
- * Analyzer.resolveTableRef().
- *
- * 2. Analysis/registration: After resolution, the concrete table ref is analyzed
- * to register a tuple descriptor for its resolved path and register other table-ref
- * specific state with the analyzer (e.g., whether it is outer/semi joined, etc.).
- *
- * Therefore, subclasses of TableRef should never call the analyze() of its superclass.
- *
- * TODO for 2.3: The current TableRef class hierarchy and the related two-phase analysis
- * feels convoluted and is hard to follow. We should reorganize the TableRef class
- * structure for clarity of analysis and avoid a table ref 'switching genders' in between
- * resolution and registration.
- *
- * TODO for 2.3: Rename this class to CollectionRef and re-consider the naming and
- * structure of all subclasses.
- */
-public class TableRef implements ParseNode {
- // Path to a collection type. Not set for inline views.
- protected List<String> rawPath_;
-
- // Legal aliases of this table ref. Contains the explicit alias as its sole element if
- // there is one. Otherwise, contains the two implicit aliases. Implicit aliases are set
- // in the c'tor of the corresponding resolved table ref (subclasses of TableRef) during
- // analysis. By convention, for table refs with multiple implicit aliases, aliases_[0]
- // contains the fully-qualified implicit alias to ensure that aliases_[0] always
- // uniquely identifies this table ref regardless of whether it has an explicit alias.
- protected String[] aliases_;
-
- // Indicates whether this table ref is given an explicit alias,
- protected boolean hasExplicitAlias_;
-
- protected JoinOperator joinOp_;
- protected ArrayList<String> joinHints_;
- protected List<String> usingColNames_;
-
- protected ArrayList<String> tableHints_;
- protected TReplicaPreference replicaPreference_;
- protected boolean randomReplica_;
-
- // Hinted distribution mode for this table ref; set after analyzeJoinHints()
- // TODO: Move join-specific members out of TableRef.
- private DistributionMode distrMode_ = DistributionMode.NONE;
-
- /////////////////////////////////////////
- // BEGIN: Members that need to be reset()
-
- // Resolution of rawPath_ if applicable. Result of analysis.
- protected Path resolvedPath_;
-
- protected Expr onClause_;
-
- // the ref to the left of us, if we're part of a JOIN clause
- protected TableRef leftTblRef_;
-
- // true if this TableRef has been analyzed; implementing subclass should set it to true
- // at the end of analyze() call.
- protected boolean isAnalyzed_;
-
- // Lists of table ref ids and materialized tuple ids of the full sequence of table
- // refs up to and including this one. These ids are cached during analysis because
- // we may alter the chain of table refs during plan generation, but we still rely
- // on the original list of ids for correct predicate assignment.
- // Populated in analyzeJoin().
- protected List<TupleId> allTableRefIds_ = Lists.newArrayList();
- protected List<TupleId> allMaterializedTupleIds_ = Lists.newArrayList();
-
- // All physical tuple ids that this table ref is correlated with:
- // Tuple ids of root descriptors from outer query blocks that this table ref
- // (if a CollectionTableRef) or contained CollectionTableRefs (if an InlineViewRef)
- // are rooted at. Populated during analysis.
- protected List<TupleId> correlatedTupleIds_ = Lists.newArrayList();
-
- // analysis output
- protected TupleDescriptor desc_;
-
- // END: Members that need to be reset()
- /////////////////////////////////////////
-
- public TableRef(List<String> path, String alias) {
- super();
- rawPath_ = path;
- if (alias != null) {
- aliases_ = new String[] { alias.toLowerCase() };
- hasExplicitAlias_ = true;
- } else {
- hasExplicitAlias_ = false;
- }
- isAnalyzed_ = false;
- replicaPreference_ = null;
- randomReplica_ = false;
- }
-
- /**
- * C'tor for cloning.
- */
- protected TableRef(TableRef other) {
- rawPath_ = other.rawPath_;
- resolvedPath_ = other.resolvedPath_;
- aliases_ = other.aliases_;
- hasExplicitAlias_ = other.hasExplicitAlias_;
- joinOp_ = other.joinOp_;
- joinHints_ =
- (other.joinHints_ != null) ? Lists.newArrayList(other.joinHints_) : null;
- onClause_ = (other.onClause_ != null) ? other.onClause_.clone() : null;
- usingColNames_ =
- (other.usingColNames_ != null) ? Lists.newArrayList(other.usingColNames_) : null;
- tableHints_ =
- (other.tableHints_ != null) ? Lists.newArrayList(other.tableHints_) : null;
- replicaPreference_ = other.replicaPreference_;
- randomReplica_ = other.randomReplica_;
- distrMode_ = other.distrMode_;
- // The table ref links are created at the statement level, so cloning a set of linked
- // table refs is the responsibility of the statement.
- leftTblRef_ = null;
- isAnalyzed_ = other.isAnalyzed_;
- allTableRefIds_ = Lists.newArrayList(other.allTableRefIds_);
- allMaterializedTupleIds_ = Lists.newArrayList(other.allMaterializedTupleIds_);
- correlatedTupleIds_ = Lists.newArrayList(other.correlatedTupleIds_);
- desc_ = other.desc_;
- }
-
- @Override
- public void analyze(Analyzer analyzer) throws AnalysisException {
- throw new IllegalStateException(
- "Should not call analyze() on an unresolved TableRef.");
- }
-
- /**
- * Creates and returns a empty TupleDescriptor registered with the analyzer
- * based on the resolvedPath_.
- * This method is called from the analyzer when registering this table reference.
- */
- public TupleDescriptor createTupleDescriptor(Analyzer analyzer)
- throws AnalysisException {
- TupleDescriptor result = analyzer.getDescTbl().createTupleDescriptor(
- getClass().getSimpleName() + " " + getUniqueAlias());
- result.setPath(resolvedPath_);
- return result;
- }
-
- /**
- * Set this table's context-dependent join attributes from the given table.
- * Does not clone the attributes.
- */
- protected void setJoinAttrs(TableRef other) {
- this.joinOp_ = other.joinOp_;
- this.joinHints_ = other.joinHints_;
- this.tableHints_ = other.tableHints_;
- this.onClause_ = other.onClause_;
- this.usingColNames_ = other.usingColNames_;
- }
-
- public JoinOperator getJoinOp() {
- // if it's not explicitly set, we're doing an inner join
- return (joinOp_ == null ? JoinOperator.INNER_JOIN : joinOp_);
- }
-
- public TReplicaPreference getReplicaPreference() { return replicaPreference_; }
- public boolean getRandomReplica() { return randomReplica_; }
-
- /**
- * Returns true if this table ref has a resolved path that is rooted at a registered
- * tuple descriptor, false otherwise.
- */
- public boolean isRelative() { return false; }
-
- /**
- * Indicates if this TableRef directly or indirectly references another TableRef from
- * an outer query block.
- */
- public boolean isCorrelated() { return !correlatedTupleIds_.isEmpty(); }
-
- public List<String> getPath() { return rawPath_; }
- public Path getResolvedPath() { return resolvedPath_; }
-
- /**
- * Returns all legal aliases of this table ref.
- */
- public String[] getAliases() { return aliases_; }
-
- /**
- * Returns the explicit alias or the fully-qualified implicit alias. The returned alias
- * is guaranteed to be unique (i.e., column/field references against the alias cannot
- * be ambiguous).
- */
- public String getUniqueAlias() { return aliases_[0]; }
-
- /**
- * Returns true if this table ref has an explicit alias.
- * Note that getAliases().length() == 1 does not imply an explicit alias because
- * nested collection refs have only a single implicit alias.
- */
- public boolean hasExplicitAlias() { return hasExplicitAlias_; }
-
- /**
- * Returns the explicit alias if this table ref has one, null otherwise.
- */
- public String getExplicitAlias() {
- if (hasExplicitAlias()) return getUniqueAlias();
- return null;
- }
-
- public Table getTable() {
- Preconditions.checkNotNull(resolvedPath_);
- return resolvedPath_.getRootTable();
- }
- public ArrayList<String> getJoinHints() { return joinHints_; }
- public ArrayList<String> getTableHints() { return tableHints_; }
- public Expr getOnClause() { return onClause_; }
- public List<String> getUsingClause() { return usingColNames_; }
- public void setJoinOp(JoinOperator op) { this.joinOp_ = op; }
- public void setOnClause(Expr e) { this.onClause_ = e; }
- public void setUsingClause(List<String> colNames) { this.usingColNames_ = colNames; }
- public TableRef getLeftTblRef() { return leftTblRef_; }
- public void setLeftTblRef(TableRef leftTblRef) { this.leftTblRef_ = leftTblRef; }
- public void setJoinHints(ArrayList<String> hints) { this.joinHints_ = hints; }
- public void setTableHints(ArrayList<String> hints) { this.tableHints_ = hints; }
- public boolean isBroadcastJoin() { return distrMode_ == DistributionMode.BROADCAST; }
- public boolean isPartitionedJoin() {
- return distrMode_ == DistributionMode.PARTITIONED;
- }
- public DistributionMode getDistributionMode() { return distrMode_; }
- public List<TupleId> getCorrelatedTupleIds() { return correlatedTupleIds_; }
- public boolean isAnalyzed() { return isAnalyzed_; }
- public boolean isResolved() { return !getClass().equals(TableRef.class); }
-
- /**
- * This method should only be called after the TableRef has been analyzed.
- */
- public TupleDescriptor getDesc() {
- Preconditions.checkState(isAnalyzed_);
- // after analyze(), desc should be set.
- Preconditions.checkState(desc_ != null);
- return desc_;
- }
-
- /**
- * This method should only be called after the TableRef has been analyzed.
- */
- public TupleId getId() {
- Preconditions.checkState(isAnalyzed_);
- // after analyze(), desc should be set.
- Preconditions.checkNotNull(desc_);
- return desc_.getId();
- }
-
- public List<TupleId> getMaterializedTupleIds() {
- // This function should only be called after analyze().
- Preconditions.checkState(isAnalyzed_);
- Preconditions.checkNotNull(desc_);
- return desc_.getId().asList();
- }
-
- /**
- * Returns the list of tuple ids materialized by the full sequence of
- * table refs up to and including this one.
- */
- public List<TupleId> getAllMaterializedTupleIds() {
- Preconditions.checkState(isAnalyzed_);
- return allMaterializedTupleIds_;
- }
-
- /**
- * Return the list of table ref ids of the full sequence of table refs up to
- * and including this one.
- */
- public List<TupleId> getAllTableRefIds() {
- Preconditions.checkState(isAnalyzed_);
- return allTableRefIds_;
- }
-
- protected void analyzeHints(Analyzer analyzer) throws AnalysisException {
- // We prefer adding warnings over throwing exceptions here to maintain view
- // compatibility with Hive.
- Preconditions.checkState(isResolved());
- analyzeTableHints(analyzer);
- analyzeJoinHints(analyzer);
- }
-
- private void analyzeTableHints(Analyzer analyzer) {
- if (tableHints_ == null) return;
- if (!(this instanceof BaseTableRef)) {
- analyzer.addWarning("Table hints not supported for inline view and collections");
- return;
- }
- // BaseTableRef will always have their path resolved at this point.
- Preconditions.checkState(getResolvedPath() != null);
- if (getResolvedPath().destTable() != null &&
- !(getResolvedPath().destTable() instanceof HdfsTable)) {
- analyzer.addWarning("Table hints only supported for Hdfs tables");
- }
- for (String hint: tableHints_) {
- if (hint.equalsIgnoreCase("SCHEDULE_CACHE_LOCAL")) {
- analyzer.setHasPlanHints();
- replicaPreference_ = TReplicaPreference.CACHE_LOCAL;
- } else if (hint.equalsIgnoreCase("SCHEDULE_DISK_LOCAL")) {
- analyzer.setHasPlanHints();
- replicaPreference_ = TReplicaPreference.DISK_LOCAL;
- } else if (hint.equalsIgnoreCase("SCHEDULE_REMOTE")) {
- analyzer.setHasPlanHints();
- replicaPreference_ = TReplicaPreference.REMOTE;
- } else if (hint.equalsIgnoreCase("SCHEDULE_RANDOM_REPLICA")) {
- analyzer.setHasPlanHints();
- randomReplica_ = true;
- } else {
- Preconditions.checkState(getAliases() != null && getAliases().length > 0);
- analyzer.addWarning("Table hint not recognized for table " + getUniqueAlias() +
- ": " + hint);
- }
- }
- }
-
- private void analyzeJoinHints(Analyzer analyzer) throws AnalysisException {
- if (joinHints_ == null) return;
- for (String hint: joinHints_) {
- if (hint.equalsIgnoreCase("BROADCAST")) {
- if (joinOp_ == JoinOperator.RIGHT_OUTER_JOIN
- || joinOp_ == JoinOperator.FULL_OUTER_JOIN
- || joinOp_ == JoinOperator.RIGHT_SEMI_JOIN
- || joinOp_ == JoinOperator.RIGHT_ANTI_JOIN) {
- throw new AnalysisException(
- joinOp_.toString() + " does not support BROADCAST.");
- }
- if (isPartitionedJoin()) {
- throw new AnalysisException("Conflicting JOIN hint: " + hint);
- }
- distrMode_ = DistributionMode.BROADCAST;
- analyzer.setHasPlanHints();
- } else if (hint.equalsIgnoreCase("SHUFFLE")) {
- if (joinOp_ == JoinOperator.CROSS_JOIN) {
- throw new AnalysisException("CROSS JOIN does not support SHUFFLE.");
- }
- if (isBroadcastJoin()) {
- throw new AnalysisException("Conflicting JOIN hint: " + hint);
- }
- distrMode_ = DistributionMode.PARTITIONED;
- analyzer.setHasPlanHints();
- } else {
- analyzer.addWarning("JOIN hint not recognized: " + hint);
- }
- }
- }
-
- /**
- * Analyzes the join clause. Populates allTableRefIds_ and allMaterializedTupleIds_.
- * The join clause can only be analyzed after the left table has been analyzed
- * and the TupleDescriptor (desc) of this table has been created.
- */
- public void analyzeJoin(Analyzer analyzer) throws AnalysisException {
- Preconditions.checkState(leftTblRef_ == null || leftTblRef_.isAnalyzed_);
- Preconditions.checkState(desc_ != null);
-
- // Populate the lists of all table ref and materialized tuple ids.
- allTableRefIds_.clear();
- allMaterializedTupleIds_.clear();
- if (leftTblRef_ != null) {
- allTableRefIds_.addAll(leftTblRef_.getAllTableRefIds());
- allMaterializedTupleIds_.addAll(leftTblRef_.getAllMaterializedTupleIds());
- }
- allTableRefIds_.add(getId());
- allMaterializedTupleIds_.addAll(getMaterializedTupleIds());
-
- if (joinOp_ == JoinOperator.CROSS_JOIN) {
- // A CROSS JOIN is always a broadcast join, regardless of the join hints
- distrMode_ = DistributionMode.BROADCAST;
- }
-
- if (usingColNames_ != null) {
- Preconditions.checkState(joinOp_ != JoinOperator.CROSS_JOIN);
- // Turn USING clause into equivalent ON clause.
- onClause_ = null;
- for (String colName: usingColNames_) {
- // check whether colName exists both for our table and the one
- // to the left of us
- Path leftColPath = new Path(leftTblRef_.getDesc(),
- Lists.newArrayList(colName.toLowerCase()));
- if (!leftColPath.resolve()) {
- throw new AnalysisException(
- "unknown column " + colName + " for alias "
- + leftTblRef_.getUniqueAlias() + " (in \"" + this.toSql() + "\")");
- }
- Path rightColPath = new Path(desc_,
- Lists.newArrayList(colName.toLowerCase()));
- if (!rightColPath.resolve()) {
- throw new AnalysisException(
- "unknown column " + colName + " for alias "
- + getUniqueAlias() + " (in \"" + this.toSql() + "\")");
- }
-
- // create predicate "<left>.colName = <right>.colName"
- BinaryPredicate eqPred =
- new BinaryPredicate(BinaryPredicate.Operator.EQ,
- new SlotRef(Path.createRawPath(leftTblRef_.getUniqueAlias(), colName)),
- new SlotRef(Path.createRawPath(getUniqueAlias(), colName)));
- onClause_ = CompoundPredicate.createConjunction(eqPred, onClause_);
- }
- }
-
- // at this point, both 'this' and leftTblRef have been analyzed and registered;
- // register the tuple ids of the TableRefs on the nullable side of an outer join
- if (joinOp_ == JoinOperator.LEFT_OUTER_JOIN
- || joinOp_ == JoinOperator.FULL_OUTER_JOIN) {
- analyzer.registerOuterJoinedTids(getId().asList(), this);
- }
- if (joinOp_ == JoinOperator.RIGHT_OUTER_JOIN
- || joinOp_ == JoinOperator.FULL_OUTER_JOIN) {
- analyzer.registerOuterJoinedTids(leftTblRef_.getAllTableRefIds(), this);
- }
- // register the tuple ids of a full outer join
- if (joinOp_ == JoinOperator.FULL_OUTER_JOIN) {
- analyzer.registerFullOuterJoinedTids(leftTblRef_.getAllTableRefIds(), this);
- analyzer.registerFullOuterJoinedTids(getId().asList(), this);
- }
- // register the tuple id of the rhs of a left semi join
- TupleId semiJoinedTupleId = null;
- if (joinOp_ == JoinOperator.LEFT_SEMI_JOIN
- || joinOp_ == JoinOperator.LEFT_ANTI_JOIN
- || joinOp_ == JoinOperator.NULL_AWARE_LEFT_ANTI_JOIN) {
- analyzer.registerSemiJoinedTid(getId(), this);
- semiJoinedTupleId = getId();
- }
- // register the tuple id of the lhs of a right semi join
- if (joinOp_ == JoinOperator.RIGHT_SEMI_JOIN
- || joinOp_ == JoinOperator.RIGHT_ANTI_JOIN) {
- analyzer.registerSemiJoinedTid(leftTblRef_.getId(), this);
- semiJoinedTupleId = leftTblRef_.getId();
- }
-
- if (onClause_ != null) {
- Preconditions.checkState(joinOp_ != JoinOperator.CROSS_JOIN);
- analyzer.setVisibleSemiJoinedTuple(semiJoinedTupleId);
- onClause_.analyze(analyzer);
- analyzer.setVisibleSemiJoinedTuple(null);
- onClause_.checkReturnsBool("ON clause", true);
- if (onClause_.contains(Expr.isAggregatePredicate())) {
- throw new AnalysisException(
- "aggregate function not allowed in ON clause: " + toSql());
- }
- if (onClause_.contains(AnalyticExpr.class)) {
- throw new AnalysisException(
- "analytic expression not allowed in ON clause: " + toSql());
- }
- Set<TupleId> onClauseTupleIds = Sets.newHashSet();
- List<Expr> conjuncts = onClause_.getConjuncts();
- // Outer join clause conjuncts are registered for this particular table ref
- // (ie, can only be evaluated by the plan node that implements this join).
- // The exception are conjuncts that only pertain to the nullable side
- // of the outer join; those can be evaluated directly when materializing tuples
- // without violating outer join semantics.
- analyzer.registerOnClauseConjuncts(conjuncts, this);
- for (Expr e: conjuncts) {
- List<TupleId> tupleIds = Lists.newArrayList();
- e.getIds(tupleIds, null);
- onClauseTupleIds.addAll(tupleIds);
- }
- } else if (!isRelative() && !isCorrelated()
- && (getJoinOp().isOuterJoin() || getJoinOp().isSemiJoin())) {
- throw new AnalysisException(
- joinOp_.toString() + " requires an ON or USING clause.");
- } else {
- // Indicate that this table ref has an empty ON-clause.
- analyzer.registerOnClauseConjuncts(Collections.<Expr>emptyList(), this);
- }
- }
-
- protected String tableRefToSql() {
- String aliasSql = null;
- String alias = getExplicitAlias();
- if (alias != null) aliasSql = ToSqlUtils.getIdentSql(alias);
- List<String> path = rawPath_;
- if (resolvedPath_ != null) path = resolvedPath_.getFullyQualifiedRawPath();
- return ToSqlUtils.getPathSql(path) + ((aliasSql != null) ? " " + aliasSql : "");
- }
-
- @Override
- public String toSql() {
- if (joinOp_ == null) {
- // prepend "," if we're part of a sequence of table refs w/o an
- // explicit JOIN clause
- return (leftTblRef_ != null ? ", " : "") + tableRefToSql();
- }
-
- StringBuilder output = new StringBuilder(" " + joinOp_.toString() + " ");
- if(joinHints_ != null) output.append(ToSqlUtils.getPlanHintsSql(joinHints_) + " ");
- output.append(tableRefToSql());
- if (usingColNames_ != null) {
- output.append(" USING (").append(Joiner.on(", ").join(usingColNames_)).append(")");
- } else if (onClause_ != null) {
- output.append(" ON ").append(onClause_.toSql());
- }
- return output.toString();
- }
-
- /**
- * Returns a deep clone of this table ref without also cloning the chain of table refs.
- * Sets leftTblRef_ in the returned clone to null.
- */
- @Override
- protected TableRef clone() { return new TableRef(this); }
-
- /**
- * Deep copies the given list of table refs and returns the clones in a new list.
- * The linking structure in the original table refs is preserved in the clones,
- * i.e., if the table refs were originally linked, then the corresponding clones
- * are linked in the same way. Similarly, if the original table refs were not linked
- * then the clones are also not linked.
- * Assumes that the given table refs are self-contained with respect to linking, i.e.,
- * that no table ref links to another table ref not in the list.
- */
- public static List<TableRef> cloneTableRefList(List<TableRef> tblRefs) {
- List<TableRef> clonedTblRefs = Lists.newArrayListWithCapacity(tblRefs.size());
- TableRef leftTblRef = null;
- for (TableRef tblRef: tblRefs) {
- TableRef tblRefClone = tblRef.clone();
- clonedTblRefs.add(tblRefClone);
- if (tblRef.leftTblRef_ != null) {
- Preconditions.checkState(tblRefs.contains(tblRef.leftTblRef_));
- tblRefClone.leftTblRef_ = leftTblRef;
- }
- leftTblRef = tblRefClone;
- }
- return clonedTblRefs;
- }
-
- public void reset() {
- isAnalyzed_ = false;
- resolvedPath_ = null;
- if (usingColNames_ != null) {
- // The using col names are converted into an on-clause predicate during analysis,
- // so unset the on-clause here.
- onClause_ = null;
- } else if (onClause_ != null) {
- onClause_.reset();
- }
- leftTblRef_ = null;
- allTableRefIds_.clear();
- allMaterializedTupleIds_.clear();
- correlatedTupleIds_.clear();
- desc_ = null;
- }
-}