You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2017/04/20 17:18:53 UTC
[2/5] hive git commit: HIVE-16423: Add hint to enforce semi join
optimization (Deepak Jaiswal, reviewed by Jason Dere)
http://git-wip-us.apache.org/repos/asf/hive/blob/9d5d737d/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig
new file mode 100644
index 0000000..b5a5645
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig
@@ -0,0 +1,13508 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.Serializable;
+import java.security.AccessControlException;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Queue;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.UUID;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import org.antlr.runtime.ClassicToken;
+import org.antlr.runtime.CommonToken;
+import org.antlr.runtime.Token;
+import org.antlr.runtime.tree.Tree;
+import org.antlr.runtime.tree.TreeVisitor;
+import org.antlr.runtime.tree.TreeVisitorAction;
+import org.antlr.runtime.tree.TreeWizard;
+import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
+import org.apache.calcite.rel.RelNode;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
+import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.QueryProperties;
+import org.apache.hadoop.hive.ql.QueryState;
+import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.RecordReader;
+import org.apache.hadoop.hive.ql.exec.RecordWriter;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.AcidUtils.Operation;
+import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
+import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
+import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.metadata.DummyPartition;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveUtils;
+import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.optimizer.Optimizer;
+import org.apache.hadoop.hive.ql.optimizer.Transform;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc;
+import org.apache.hadoop.hive.ql.optimizer.lineage.Generator;
+import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType;
+import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher;
+import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;
+import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType;
+import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
+import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
+import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
+import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
+import org.apache.hadoop.hive.ql.plan.CreateViewDesc;
+import org.apache.hadoop.hive.ql.plan.DDLWork;
+import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc;
+import org.apache.hadoop.hive.ql.plan.ForwardDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.plan.InsertTableDesc;
+import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
+import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc;
+import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
+import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
+import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PTFDesc;
+import org.apache.hadoop.hive.ql.plan.PlanUtils;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.ScriptDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.plan.UnionDesc;
+import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.ql.util.ResourceDownloader;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
+import org.apache.hadoop.hive.serde2.NoOpFetchFormatter;
+import org.apache.hadoop.hive.serde2.NullStructSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.shims.HadoopShims;
+import org.apache.hadoop.hive.shims.Utils;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import com.google.common.base.Splitter;
+import com.google.common.base.Strings;
+import com.google.common.collect.Sets;
+import com.google.common.math.IntMath;
+
+/**
+ * Implementation of the semantic analyzer. It generates the query plan.
+ * There are other specific semantic analyzers for some hive operations such as
+ * DDLSemanticAnalyzer for ddl operations.
+ */
+
+public class SemanticAnalyzer extends BaseSemanticAnalyzer {
+
+ public static final String DUMMY_DATABASE = "_dummy_database";
+ public static final String DUMMY_TABLE = "_dummy_table";
+ public static final String SUBQUERY_TAG_1 = "-subquery1";
+ public static final String SUBQUERY_TAG_2 = "-subquery2";
+
+ // Max characters when auto generating the column name with func name
+ private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
+
+ public static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__";
+
+ static final String MATERIALIZATION_MARKER = "$MATERIALIZATION";
+
+ private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner;
+ private HashMap<TableScanOperator, PrunedPartitionList> opToPartList;
+ protected HashMap<String, TableScanOperator> topOps;
+ protected LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx;
+ private List<LoadTableDesc> loadTableWork;
+ private List<LoadFileDesc> loadFileWork;
+ private List<ColumnStatsAutoGatherContext> columnStatsAutoGatherContexts;
+ private final Map<JoinOperator, QBJoinTree> joinContext;
+ private final Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext;
+ private final HashMap<TableScanOperator, Table> topToTable;
+ private final Map<FileSinkOperator, Table> fsopToTable;
+ private final List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting;
+ private final HashMap<TableScanOperator, Map<String, String>> topToTableProps;
+ private QB qb;
+ private ASTNode ast;
+ private int destTableId;
+ private UnionProcContext uCtx;
+ List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer;
+ private HashMap<TableScanOperator, SampleDesc> opToSamplePruner;
+ private final Map<TableScanOperator, Map<String, ExprNodeDesc>> opToPartToSkewedPruner;
+ private Map<SelectOperator, Table> viewProjectToTableSchema;
+ /**
+ * a map for the split sampling, from alias to an instance of SplitSample
+ * that describes percentage and number.
+ */
+ private final HashMap<String, SplitSample> nameToSplitSample;
+ Map<GroupByOperator, Set<String>> groupOpToInputTables;
+ Map<String, PrunedPartitionList> prunedPartitions;
+ protected List<FieldSchema> resultSchema;
+ protected CreateViewDesc createVwDesc;
+ protected ArrayList<String> viewsExpanded;
+ protected ASTNode viewSelect;
+ protected final UnparseTranslator unparseTranslator;
+ private final GlobalLimitCtx globalLimitCtx;
+
+ // prefix for column names auto generated by hive
+ private final String autogenColAliasPrfxLbl;
+ private final boolean autogenColAliasPrfxIncludeFuncName;
+
+ // Keep track of view alias to read entity corresponding to the view
+ // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
+ // keeps track of aliases for V3, V3:V2, V3:V2:V1.
+ // This is used when T is added as an input for the query, the parents of T is
+ // derived from the alias V3:V2:V1:T
+ private final Map<String, ReadEntity> viewAliasToInput;
+
+ //need merge isDirect flag to input even if the newInput does not have a parent
+ private boolean mergeIsDirect;
+
+ // flag for no scan during analyze ... compute statistics
+ protected boolean noscan;
+
+ //flag for partial scan during analyze ... compute statistics
+ protected boolean partialscan;
+
+ protected volatile boolean disableJoinMerge = false;
+ protected final boolean defaultJoinMerge;
+
+ /*
+ * Capture the CTE definitions in a Query.
+ */
+ final Map<String, CTEClause> aliasToCTEs;
+
+ /*
+ * Used to check recursive CTE invocations. Similar to viewsExpanded
+ */
+ ArrayList<String> ctesExpanded;
+
+ /*
+ * Whether root tasks after materialized CTE linkage have been resolved
+ */
+ boolean rootTasksResolved;
+
+ protected TableMask tableMask;
+
+ CreateTableDesc tableDesc;
+
+ /** Not thread-safe. */
+ final ASTSearcher astSearcher = new ASTSearcher();
+
+ protected AnalyzeRewriteContext analyzeRewrite;
+
+ // A mapping from a tableName to a table object in metastore.
+ Map<String, Table> tabNameToTabObject;
+
+ // The tokens we should ignore when we are trying to do table masking.
+ private final Set<Integer> ignoredTokens = Sets.newHashSet(HiveParser.TOK_GROUPBY,
+ HiveParser.TOK_ORDERBY, HiveParser.TOK_WINDOWSPEC, HiveParser.TOK_CLUSTERBY,
+ HiveParser.TOK_DISTRIBUTEBY, HiveParser.TOK_SORTBY);
+
+ static class Phase1Ctx {
+ String dest;
+ int nextNum;
+ }
+
+ public SemanticAnalyzer(QueryState queryState) throws SemanticException {
+ super(queryState);
+ opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>();
+ opToPartList = new HashMap<TableScanOperator, PrunedPartitionList>();
+ opToSamplePruner = new HashMap<TableScanOperator, SampleDesc>();
+ nameToSplitSample = new HashMap<String, SplitSample>();
+ // Must be deterministic order maps - see HIVE-8707
+ topOps = new LinkedHashMap<String, TableScanOperator>();
+ loadTableWork = new ArrayList<LoadTableDesc>();
+ loadFileWork = new ArrayList<LoadFileDesc>();
+ columnStatsAutoGatherContexts = new ArrayList<ColumnStatsAutoGatherContext>();
+ opParseCtx = new LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext>();
+ joinContext = new HashMap<JoinOperator, QBJoinTree>();
+ smbMapJoinContext = new HashMap<SMBMapJoinOperator, QBJoinTree>();
+ // Must be deterministic order map for consistent q-test output across Java versions
+ topToTable = new LinkedHashMap<TableScanOperator, Table>();
+ fsopToTable = new HashMap<FileSinkOperator, Table>();
+ reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList<ReduceSinkOperator>();
+ topToTableProps = new HashMap<TableScanOperator, Map<String, String>>();
+ destTableId = 1;
+ uCtx = null;
+ listMapJoinOpsNoReducer = new ArrayList<AbstractMapJoinOperator<? extends MapJoinDesc>>();
+ groupOpToInputTables = new HashMap<GroupByOperator, Set<String>>();
+ prunedPartitions = new HashMap<String, PrunedPartitionList>();
+ tabNameToTabObject = new HashMap<String, Table>();
+ unparseTranslator = new UnparseTranslator(conf);
+ autogenColAliasPrfxLbl = HiveConf.getVar(conf,
+ HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL);
+ autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
+ HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
+ queryProperties = new QueryProperties();
+ opToPartToSkewedPruner = new HashMap<TableScanOperator, Map<String, ExprNodeDesc>>();
+ aliasToCTEs = new HashMap<String, CTEClause>();
+ globalLimitCtx = new GlobalLimitCtx();
+ viewAliasToInput = new HashMap<String, ReadEntity>();
+ mergeIsDirect = true;
+ noscan = partialscan = false;
+ tabNameToTabObject = new HashMap<>();
+ defaultJoinMerge = false == HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MERGE_NWAY_JOINS);
+ disableJoinMerge = defaultJoinMerge;
+ }
+
+ @Override
+ protected void reset(boolean clearPartsCache) {
+ super.reset(true);
+ if(clearPartsCache) {
+ prunedPartitions.clear();
+
+ //When init(true) combine with genResolvedParseTree, it will generate Resolved Parse tree from syntax tree
+ //ReadEntity created under these conditions should be all relevant to the syntax tree even the ones without parents
+ //set mergeIsDirect to true here.
+ mergeIsDirect = true;
+ } else {
+ mergeIsDirect = false;
+ }
+ tabNameToTabObject.clear();
+ loadTableWork.clear();
+ loadFileWork.clear();
+ columnStatsAutoGatherContexts.clear();
+ topOps.clear();
+ destTableId = 1;
+ idToTableNameMap.clear();
+ qb = null;
+ ast = null;
+ uCtx = null;
+ joinContext.clear();
+ smbMapJoinContext.clear();
+ opParseCtx.clear();
+ groupOpToInputTables.clear();
+ disableJoinMerge = defaultJoinMerge;
+ aliasToCTEs.clear();
+ topToTable.clear();
+ opToPartPruner.clear();
+ opToPartList.clear();
+ opToPartToSkewedPruner.clear();
+ opToSamplePruner.clear();
+ nameToSplitSample.clear();
+ fsopToTable.clear();
+ resultSchema = null;
+ createVwDesc = null;
+ viewsExpanded = null;
+ viewSelect = null;
+ ctesExpanded = null;
+ globalLimitCtx.disableOpt();
+ viewAliasToInput.clear();
+ reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
+ topToTableProps.clear();
+ listMapJoinOpsNoReducer.clear();
+ unparseTranslator.clear();
+ queryProperties.clear();
+ outputs.clear();
+ }
+
+ public void initParseCtx(ParseContext pctx) {
+ opToPartPruner = pctx.getOpToPartPruner();
+ opToPartList = pctx.getOpToPartList();
+ opToSamplePruner = pctx.getOpToSamplePruner();
+ topOps = pctx.getTopOps();
+ loadTableWork = pctx.getLoadTableWork();
+ loadFileWork = pctx.getLoadFileWork();
+ ctx = pctx.getContext();
+ destTableId = pctx.getDestTableId();
+ idToTableNameMap = pctx.getIdToTableNameMap();
+ uCtx = pctx.getUCtx();
+ listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
+ prunedPartitions = pctx.getPrunedPartitions();
+ tabNameToTabObject = pctx.getTabNameToTabObject();
+ fetchTask = pctx.getFetchTask();
+ setLineageInfo(pctx.getLineageInfo());
+ }
+
+ public ParseContext getParseContext() {
+ // Make sure the basic query properties are initialized
+ copyInfoToQueryProperties(queryProperties);
+ return new ParseContext(queryState, opToPartPruner, opToPartList, topOps,
+ new HashSet<JoinOperator>(joinContext.keySet()),
+ new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()),
+ loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx,
+ listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject,
+ opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
+ opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting,
+ analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
+ }
+
+ public CompilationOpContext getOpContext() {
+ return ctx.getOpContext();
+ }
+
+ public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias)
+ throws SemanticException {
+ doPhase1QBExpr(ast, qbexpr, id, alias, false);
+ }
+ @SuppressWarnings("nls")
+ public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, boolean insideView)
+ throws SemanticException {
+
+ assert (ast.getToken() != null);
+ if (ast.getToken().getType() == HiveParser.TOK_QUERY) {
+ QB qb = new QB(id, alias, true);
+ qb.setInsideView(insideView);
+ Phase1Ctx ctx_1 = initPhase1Ctx();
+ doPhase1(ast, qb, ctx_1, null);
+
+ qbexpr.setOpcode(QBExpr.Opcode.NULLOP);
+ qbexpr.setQB(qb);
+ }
+ // setop
+ else {
+ switch (ast.getToken().getType()) {
+ case HiveParser.TOK_UNIONALL:
+ qbexpr.setOpcode(QBExpr.Opcode.UNION);
+ break;
+ case HiveParser.TOK_INTERSECTALL:
+ qbexpr.setOpcode(QBExpr.Opcode.INTERSECTALL);
+ break;
+ case HiveParser.TOK_INTERSECTDISTINCT:
+ qbexpr.setOpcode(QBExpr.Opcode.INTERSECT);
+ break;
+ case HiveParser.TOK_EXCEPTALL:
+ qbexpr.setOpcode(QBExpr.Opcode.EXCEPTALL);
+ break;
+ case HiveParser.TOK_EXCEPTDISTINCT:
+ qbexpr.setOpcode(QBExpr.Opcode.EXCEPT);
+ break;
+ default:
+ throw new SemanticException(ErrorMsg.UNSUPPORTED_SET_OPERATOR.getMsg("Type "
+ + ast.getToken().getType()));
+ }
+ // query 1
+ assert (ast.getChild(0) != null);
+ QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1);
+ doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + SUBQUERY_TAG_1, alias
+ + SUBQUERY_TAG_1, insideView);
+ qbexpr.setQBExpr1(qbexpr1);
+
+ // query 2
+ assert (ast.getChild(1) != null);
+ QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2);
+ doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + SUBQUERY_TAG_2, alias
+ + SUBQUERY_TAG_2, insideView);
+ qbexpr.setQBExpr2(qbexpr2);
+ }
+ }
+
+ private LinkedHashMap<String, ASTNode> doPhase1GetAggregationsFromSelect(
+ ASTNode selExpr, QB qb, String dest) throws SemanticException {
+
+ // Iterate over the selects search for aggregation Trees.
+ // Use String as keys to eliminate duplicate trees.
+ LinkedHashMap<String, ASTNode> aggregationTrees = new LinkedHashMap<String, ASTNode>();
+ List<ASTNode> wdwFns = new ArrayList<ASTNode>();
+ for (int i = 0; i < selExpr.getChildCount(); ++i) {
+ ASTNode function = (ASTNode) selExpr.getChild(i);
+ if (function.getType() == HiveParser.TOK_SELEXPR ||
+ function.getType() == HiveParser.TOK_SUBQUERY_EXPR) {
+ function = (ASTNode)function.getChild(0);
+ }
+ doPhase1GetAllAggregations(function, aggregationTrees, wdwFns);
+ }
+
+ // window based aggregations are handled differently
+ for (ASTNode wdwFn : wdwFns) {
+ WindowingSpec spec = qb.getWindowingSpec(dest);
+ if(spec == null) {
+ queryProperties.setHasWindowing(true);
+ spec = new WindowingSpec();
+ qb.addDestToWindowingSpec(dest, spec);
+ }
+ HashMap<String, ASTNode> wExprsInDest = qb.getParseInfo().getWindowingExprsForClause(dest);
+ int wColIdx = spec.getWindowExpressions() == null ? 0 : spec.getWindowExpressions().size();
+ WindowFunctionSpec wFnSpec = processWindowFunction(wdwFn,
+ (ASTNode)wdwFn.getChild(wdwFn.getChildCount()-1));
+ // If this is a duplicate invocation of a function; don't add to WindowingSpec.
+ if ( wExprsInDest != null &&
+ wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) {
+ continue;
+ }
+ wFnSpec.setAlias(wFnSpec.getName() + "_window_" + wColIdx);
+ spec.addWindowFunction(wFnSpec);
+ qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression());
+ }
+
+ return aggregationTrees;
+ }
+
+ private void doPhase1GetColumnAliasesFromSelect(
+ ASTNode selectExpr, QBParseInfo qbp) {
+ for (int i = 0; i < selectExpr.getChildCount(); ++i) {
+ ASTNode selExpr = (ASTNode) selectExpr.getChild(i);
+ if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR)
+ && (selExpr.getChildCount() == 2)) {
+ String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText());
+ qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias);
+ }
+ }
+ }
+
+ /**
+ * DFS-scan the expressionTree to find all aggregation subtrees and put them
+ * in aggregations.
+ *
+ * @param expressionTree
+ * @param aggregations
+ * the key to the HashTable is the toStringTree() representation of
+ * the aggregation subtree.
+ * @throws SemanticException
+ */
+ private void doPhase1GetAllAggregations(ASTNode expressionTree,
+ HashMap<String, ASTNode> aggregations, List<ASTNode> wdwFns) throws SemanticException {
+ int exprTokenType = expressionTree.getToken().getType();
+ if(exprTokenType == HiveParser.TOK_SUBQUERY_EXPR) {
+ //since now we have scalar subqueries we can get subquery expression in having
+ // we don't want to include aggregate from within subquery
+ return;
+ }
+
+ if (exprTokenType == HiveParser.TOK_FUNCTION
+ || exprTokenType == HiveParser.TOK_FUNCTIONDI
+ || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
+ assert (expressionTree.getChildCount() != 0);
+ if (expressionTree.getChild(expressionTree.getChildCount()-1).getType()
+ == HiveParser.TOK_WINDOWSPEC) {
+ // If it is a windowing spec, we include it in the list
+ // Further, we will examine its children AST nodes to check whether
+ // there are aggregation functions within
+ wdwFns.add(expressionTree);
+ doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(expressionTree.getChildCount()-1),
+ aggregations, wdwFns);
+ return;
+ }
+ if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {
+ String functionName = unescapeIdentifier(expressionTree.getChild(0)
+ .getText());
+ // Validate the function name
+ if (FunctionRegistry.getFunctionInfo(functionName) == null) {
+ throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName));
+ }
+ if(FunctionRegistry.impliesOrder(functionName)) {
+ throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
+ }
+ if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
+ if(containsLeadLagUDF(expressionTree)) {
+ throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
+ }
+ aggregations.put(expressionTree.toStringTree(), expressionTree);
+ FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName);
+ if (!fi.isNative()) {
+ unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree
+ .getChild(0));
+ }
+ return;
+ }
+ }
+ }
+ for (int i = 0; i < expressionTree.getChildCount(); i++) {
+ doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i),
+ aggregations, wdwFns);
+ }
+ }
+
+ private List<ASTNode> doPhase1GetDistinctFuncExprs(
+ HashMap<String, ASTNode> aggregationTrees) throws SemanticException {
+ List<ASTNode> exprs = new ArrayList<ASTNode>();
+ for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
+ ASTNode value = entry.getValue();
+ assert (value != null);
+ if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) {
+ exprs.add(value);
+ }
+ }
+ return exprs;
+ }
+
+ public static String generateErrorMessage(ASTNode ast, String message) {
+ StringBuilder sb = new StringBuilder();
+ if (ast == null) {
+ sb.append(message).append(". Cannot tell the position of null AST.");
+ return sb.toString();
+ }
+ sb.append(ast.getLine());
+ sb.append(":");
+ sb.append(ast.getCharPositionInLine());
+ sb.append(" ");
+ sb.append(message);
+ sb.append(". Error encountered near token '");
+ sb.append(ErrorMsg.getText(ast));
+ sb.append("'");
+ return sb.toString();
+ }
+
+ ASTNode getAST() {
+ return this.ast;
+ }
+
+ protected void setAST(ASTNode newAST) {
+ this.ast = newAST;
+ }
+
+ int[] findTabRefIdxs(ASTNode tabref) {
+ assert tabref.getType() == HiveParser.TOK_TABREF;
+ int aliasIndex = 0;
+ int propsIndex = -1;
+ int tsampleIndex = -1;
+ int ssampleIndex = -1;
+ for (int index = 1; index < tabref.getChildCount(); index++) {
+ ASTNode ct = (ASTNode) tabref.getChild(index);
+ if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE) {
+ tsampleIndex = index;
+ } else if (ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE) {
+ ssampleIndex = index;
+ } else if (ct.getToken().getType() == HiveParser.TOK_TABLEPROPERTIES) {
+ propsIndex = index;
+ } else {
+ aliasIndex = index;
+ }
+ }
+ return new int[] {aliasIndex, propsIndex, tsampleIndex, ssampleIndex};
+ }
+ String findSimpleTableName(ASTNode tabref, int aliasIndex) {
+ assert tabref.getType() == HiveParser.TOK_TABREF;
+ ASTNode tableTree = (ASTNode) (tabref.getChild(0));
+
+ String alias;
+ if (aliasIndex != 0) {
+ alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText());
+ }
+ else {
+ alias = getUnescapedUnqualifiedTableName(tableTree);
+ }
+ return alias;
+ }
+ /**
+ * Goes though the tabref tree and finds the alias for the table. Once found,
+ * it records the table name-> alias association in aliasToTabs. It also makes
+ * an association from the alias to the table AST in parse info.
+ *
+ * @return the alias of the table
+ */
+ private String processTable(QB qb, ASTNode tabref) throws SemanticException {
+ // For each table reference get the table name
+ // and the alias (if alias is not present, the table name
+ // is used as an alias)
+ int[] indexes = findTabRefIdxs(tabref);
+ int aliasIndex = indexes[0];
+ int propsIndex = indexes[1];
+ int tsampleIndex = indexes[2];
+ int ssampleIndex = indexes[3];
+
+ ASTNode tableTree = (ASTNode) (tabref.getChild(0));
+
+ String tabIdName = getUnescapedName(tableTree).toLowerCase();
+
+ String alias = findSimpleTableName(tabref, aliasIndex);
+
+ if (propsIndex >= 0) {
+ Tree propsAST = tabref.getChild(propsIndex);
+ Map<String, String> props = DDLSemanticAnalyzer.getProps((ASTNode) propsAST.getChild(0));
+ // We get the information from Calcite.
+ if ("TRUE".equals(props.get("insideView"))) {
+ qb.getAliasInsideView().add(alias.toLowerCase());
+ }
+ qb.setTabProps(alias, props);
+ }
+
+ // If the alias is already there then we have a conflict
+ if (qb.exists(alias)) {
+ throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref
+ .getChild(aliasIndex)));
+ }
+ if (tsampleIndex >= 0) {
+ ASTNode sampleClause = (ASTNode) tabref.getChild(tsampleIndex);
+ ArrayList<ASTNode> sampleCols = new ArrayList<ASTNode>();
+ if (sampleClause.getChildCount() > 2) {
+ for (int i = 2; i < sampleClause.getChildCount(); i++) {
+ sampleCols.add((ASTNode) sampleClause.getChild(i));
+ }
+ }
+ // TODO: For now only support sampling on up to two columns
+ // Need to change it to list of columns
+ if (sampleCols.size() > 2) {
+ throw new SemanticException(generateErrorMessage(
+ (ASTNode) tabref.getChild(0),
+ ErrorMsg.SAMPLE_RESTRICTION.getMsg()));
+ }
+ TableSample tabSample = new TableSample(
+ unescapeIdentifier(sampleClause.getChild(0).getText()),
+ unescapeIdentifier(sampleClause.getChild(1).getText()),
+ sampleCols);
+ qb.getParseInfo().setTabSample(alias, tabSample);
+ if (unparseTranslator.isEnabled()) {
+ for (ASTNode sampleCol : sampleCols) {
+ unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol
+ .getChild(0));
+ }
+ }
+ } else if (ssampleIndex >= 0) {
+ ASTNode sampleClause = (ASTNode) tabref.getChild(ssampleIndex);
+
+ Tree type = sampleClause.getChild(0);
+ Tree numerator = sampleClause.getChild(1);
+ String value = unescapeIdentifier(numerator.getText());
+
+
+ SplitSample sample;
+ if (type.getType() == HiveParser.TOK_PERCENT) {
+ assertCombineInputFormat(numerator, "Percentage");
+ Double percent = Double.valueOf(value).doubleValue();
+ if (percent < 0 || percent > 100) {
+ throw new SemanticException(generateErrorMessage((ASTNode) numerator,
+ "Sampling percentage should be between 0 and 100"));
+ }
+ int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
+ sample = new SplitSample(percent, seedNum);
+ } else if (type.getType() == HiveParser.TOK_ROWCOUNT) {
+ sample = new SplitSample(Integer.parseInt(value));
+ } else {
+ assert type.getType() == HiveParser.TOK_LENGTH;
+ assertCombineInputFormat(numerator, "Total Length");
+ long length = Integer.parseInt(value.substring(0, value.length() - 1));
+ char last = value.charAt(value.length() - 1);
+ if (last == 'k' || last == 'K') {
+ length <<= 10;
+ } else if (last == 'm' || last == 'M') {
+ length <<= 20;
+ } else if (last == 'g' || last == 'G') {
+ length <<= 30;
+ }
+ int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
+ sample = new SplitSample(length, seedNum);
+ }
+ String alias_id = getAliasId(alias, qb);
+ nameToSplitSample.put(alias_id, sample);
+ }
+ // Insert this map into the stats
+ qb.setTabAlias(alias, tabIdName);
+ if (qb.isInsideView()) {
+ qb.getAliasInsideView().add(alias.toLowerCase());
+ }
+ qb.addAlias(alias);
+
+ qb.getParseInfo().setSrcForAlias(alias, tableTree);
+
+ // if alias to CTE contains the table name, we do not do the translation because
+ // cte is actually a subquery.
+ if (!this.aliasToCTEs.containsKey(tabIdName)) {
+ unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase());
+ if (aliasIndex != 0) {
+ unparseTranslator.addIdentifierTranslation((ASTNode) tabref.getChild(aliasIndex));
+ }
+ }
+
+ return alias;
+ }
+
+ Map<String, SplitSample> getNameToSplitSampleMap() {
+ return this.nameToSplitSample;
+ }
+
+ /**
+ * Convert a string to Text format and write its bytes in the same way TextOutputFormat would do.
+ * This is needed to properly encode non-ascii characters.
+ */
+ private static void writeAsText(String text, FSDataOutputStream out) throws IOException {
+ Text to = new Text(text);
+ out.write(to.getBytes(), 0, to.getLength());
+ }
+
+ /**
+ * Generate a temp table out of a values clause
+ * See also {@link #preProcessForInsert(ASTNode, QB)}
+ */
+ private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws SemanticException {
+ Path dataDir = null;
+ if(!qb.getEncryptedTargetTablePaths().isEmpty()) {
+ //currently only Insert into T values(...) is supported thus only 1 values clause
+ //and only 1 target table are possible. If/when support for
+ //select ... from values(...) is added an insert statement may have multiple
+ //encrypted target tables.
+ dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri());
+ }
+ // Pick a name for the table
+ SessionState ss = SessionState.get();
+ String tableName = VALUES_TMP_TABLE_NAME_PREFIX + ss.getNextValuesTempTableSuffix();
+
+ // Step 1, parse the values clause we were handed
+ List<? extends Node> fromChildren = originalFrom.getChildren();
+ // First child should be the virtual table ref
+ ASTNode virtualTableRef = (ASTNode)fromChildren.get(0);
+ assert virtualTableRef.getToken().getType() == HiveParser.TOK_VIRTUAL_TABREF :
+ "Expected first child of TOK_VIRTUAL_TABLE to be TOK_VIRTUAL_TABREF but was " +
+ virtualTableRef.getName();
+
+ List<? extends Node> virtualTableRefChildren = virtualTableRef.getChildren();
+ // First child of this should be the table name. If it's anonymous,
+ // then we don't have a table name.
+ ASTNode tabName = (ASTNode)virtualTableRefChildren.get(0);
+ if (tabName.getToken().getType() != HiveParser.TOK_ANONYMOUS) {
+ // TODO, if you want to make select ... from (values(...) as foo(...) work,
+ // you need to parse this list of columns names and build it into the table
+ throw new SemanticException(ErrorMsg.VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED.getMsg());
+ }
+
+ // The second child of the TOK_VIRTUAL_TABLE should be TOK_VALUES_TABLE
+ ASTNode valuesTable = (ASTNode)fromChildren.get(1);
+ assert valuesTable.getToken().getType() == HiveParser.TOK_VALUES_TABLE :
+ "Expected second child of TOK_VIRTUAL_TABLE to be TOK_VALUE_TABLE but was " +
+ valuesTable.getName();
+ // Each of the children of TOK_VALUES_TABLE will be a TOK_VALUE_ROW
+ List<? extends Node> valuesTableChildren = valuesTable.getChildren();
+
+ // Now that we're going to start reading through the rows, open a file to write the rows too
+ // If we leave this method before creating the temporary table we need to be sure to clean up
+ // this file.
+ Path tablePath = null;
+ FileSystem fs = null;
+ FSDataOutputStream out = null;
+ try {
+ if(dataDir == null) {
+ tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
+ }
+ else {
+ //if target table of insert is encrypted, make sure temporary table data is stored
+ //similarly encrypted
+ tablePath = Warehouse.getDnsPath(new Path(dataDir, tableName), conf);
+ }
+ fs = tablePath.getFileSystem(conf);
+ fs.mkdirs(tablePath);
+ Path dataFile = new Path(tablePath, "data_file");
+ out = fs.create(dataFile);
+ List<FieldSchema> fields = new ArrayList<FieldSchema>();
+
+ boolean firstRow = true;
+ for (Node n : valuesTableChildren) {
+ ASTNode valuesRow = (ASTNode) n;
+ assert valuesRow.getToken().getType() == HiveParser.TOK_VALUE_ROW :
+ "Expected child of TOK_VALUE_TABLE to be TOK_VALUE_ROW but was " + valuesRow.getName();
+ // Each of the children of this should be a literal
+ List<? extends Node> valuesRowChildren = valuesRow.getChildren();
+ boolean isFirst = true;
+ int nextColNum = 1;
+ for (Node n1 : valuesRowChildren) {
+ ASTNode value = (ASTNode) n1;
+ if (firstRow) {
+ fields.add(new FieldSchema("tmp_values_col" + nextColNum++, "string", ""));
+ }
+ if (isFirst) isFirst = false;
+ else writeAsText("\u0001", out);
+ writeAsText(unparseExprForValuesClause(value), out);
+ }
+ writeAsText("\n", out);
+ firstRow = false;
+ }
+
+ // Step 2, create a temp table, using the created file as the data
+ StorageFormat format = new StorageFormat(conf);
+ format.processStorageFormat("TextFile");
+ Table table = db.newTable(tableName);
+ table.setSerializationLib(format.getSerde());
+ table.setFields(fields);
+ table.setDataLocation(tablePath);
+ table.getTTable().setTemporary(true);
+ table.setStoredAsSubDirectories(false);
+ table.setInputFormatClass(format.getInputFormat());
+ table.setOutputFormatClass(format.getOutputFormat());
+ db.createTable(table, false);
+ } catch (Exception e) {
+ String errMsg = ErrorMsg.INSERT_CANNOT_CREATE_TEMP_FILE.getMsg() + e.getMessage();
+ LOG.error(errMsg);
+ // Try to delete the file
+ if (fs != null && tablePath != null) {
+ try {
+ fs.delete(tablePath, false);
+ } catch (IOException swallowIt) {}
+ }
+ throw new SemanticException(errMsg, e);
+ } finally {
+ IOUtils.closeStream(out);
+ }
+
+ // Step 3, return a new subtree with a from clause built around that temp table
+ // The form of the tree is TOK_TABREF->TOK_TABNAME->identifier(tablename)
+ Token t = new ClassicToken(HiveParser.TOK_TABREF);
+ ASTNode tabRef = new ASTNode(t);
+ t = new ClassicToken(HiveParser.TOK_TABNAME);
+ ASTNode tabNameNode = new ASTNode(t);
+ tabRef.addChild(tabNameNode);
+ t = new ClassicToken(HiveParser.Identifier, tableName);
+ ASTNode identifier = new ASTNode(t);
+ tabNameNode.addChild(identifier);
+ return tabRef;
+ }
+
+ // Take an expression in the values clause and turn it back into a string. This is far from
+ // comprehensive. At the moment it only supports:
+ // * literals (all types)
+ // * unary negatives
+ // * true/false
+ private String unparseExprForValuesClause(ASTNode expr) throws SemanticException {
+ switch (expr.getToken().getType()) {
+ case HiveParser.Number:
+ return expr.getText();
+
+ case HiveParser.StringLiteral:
+ return BaseSemanticAnalyzer.unescapeSQLString(expr.getText());
+
+ case HiveParser.KW_FALSE:
+ // UDFToBoolean casts any non-empty string to true, so set this to false
+ return "";
+
+ case HiveParser.KW_TRUE:
+ return "TRUE";
+
+ case HiveParser.MINUS:
+ return "-" + unparseExprForValuesClause((ASTNode)expr.getChildren().get(0));
+
+ case HiveParser.TOK_NULL:
+ // Hive's text input will translate this as a null
+ return "\\N";
+
+ default:
+ throw new SemanticException("Expression of type " + expr.getText() +
+ " not supported in insert/values");
+ }
+
+ }
+
+ private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException {
+ String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ?
+ HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT):
+ HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT);
+ if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) {
+ throw new SemanticException(generateErrorMessage((ASTNode) numerator,
+ message + " sampling is not supported in " + inputFormat));
+ }
+ }
+
+ private String processSubQuery(QB qb, ASTNode subq) throws SemanticException {
+
+ // This is a subquery and must have an alias
+ if (subq.getChildCount() != 2) {
+ throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq));
+ }
+ ASTNode subqref = (ASTNode) subq.getChild(0);
+ String alias = unescapeIdentifier(subq.getChild(1).getText());
+
+ // Recursively do the first phase of semantic analysis for the subquery
+ QBExpr qbexpr = new QBExpr(alias);
+
+ doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias, qb.isInsideView());
+
+ // If the alias is already there then we have a conflict
+ if (qb.exists(alias)) {
+ throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq
+ .getChild(1)));
+ }
+ // Insert this map into the stats
+ qb.setSubqAlias(alias, qbexpr);
+ qb.addAlias(alias);
+
+ unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1));
+
+ return alias;
+ }
+
+ /*
+ * Phase1: hold onto any CTE definitions in aliasToCTE.
+ * CTE definitions are global to the Query.
+ */
+ private void processCTE(QB qb, ASTNode ctes) throws SemanticException {
+
+ int numCTEs = ctes.getChildCount();
+
+ for(int i=0; i <numCTEs; i++) {
+ ASTNode cte = (ASTNode) ctes.getChild(i);
+ ASTNode cteQry = (ASTNode) cte.getChild(0);
+ String alias = unescapeIdentifier(cte.getChild(1).getText());
+
+ String qName = qb.getId() == null ? "" : qb.getId() + ":";
+ qName += alias.toLowerCase();
+
+ if ( aliasToCTEs.containsKey(qName)) {
+ throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(cte.getChild(1)));
+ }
+ aliasToCTEs.put(qName, new CTEClause(qName, cteQry));
+ }
+ }
+
+ /*
+ * We allow CTE definitions in views. So we can end up with a hierarchy of CTE definitions:
+ * - at the top level of a query statement
+ * - where a view is referenced.
+ * - views may refer to other views.
+ *
+ * The scoping rules we use are: to search for a CTE from the current QB outwards. In order to
+ * disambiguate between CTES are different levels we qualify(prefix) them with the id of the QB
+ * they appear in when adding them to the <code>aliasToCTEs</code> map.
+ *
+ */
+ private CTEClause findCTEFromName(QB qb, String cteName) {
+ StringBuilder qId = new StringBuilder();
+ if (qb.getId() != null) {
+ qId.append(qb.getId());
+ }
+
+ while (qId.length() > 0) {
+ String nm = qId + ":" + cteName;
+ CTEClause cte = aliasToCTEs.get(nm);
+ if (cte != null) {
+ return cte;
+ }
+ int lastIndex = qId.lastIndexOf(":");
+ lastIndex = lastIndex < 0 ? 0 : lastIndex;
+ qId.setLength(lastIndex);
+ }
+ return aliasToCTEs.get(cteName);
+ }
+
+ /*
+ * If a CTE is referenced in a QueryBlock:
+ * - add it as a SubQuery for now.
+ * - SQ.alias is the alias used in QB. (if no alias is specified,
+ * it used the CTE name. Works just like table references)
+ * - Adding SQ done by:
+ * - copying AST of CTE
+ * - setting ASTOrigin on cloned AST.
+ * - trigger phase 1 on new QBExpr.
+ * - update QB data structs: remove this as a table reference, move it to a SQ invocation.
+ */
+ private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias)
+ throws SemanticException {
+ cteAlias = cteAlias == null ? cteName : cteAlias;
+ CTEClause cte = findCTEFromName(qb, cteName);
+ ASTNode cteQryNode = cte.cteNode;
+ QBExpr cteQBExpr = new QBExpr(cteAlias);
+ doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias);
+ qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr);
+ }
+
+ private final CTEClause rootClause = new CTEClause(null, null);
+
+ @Override
+ public List<Task<? extends Serializable>> getAllRootTasks() {
+ if (!rootTasksResolved) {
+ rootTasks = toRealRootTasks(rootClause.asExecutionOrder());
+ rootTasksResolved = true;
+ }
+ return rootTasks;
+ }
+
+ @Override
+ public HashSet<ReadEntity> getAllInputs() {
+ HashSet<ReadEntity> readEntities = new HashSet<ReadEntity>(getInputs());
+ for (CTEClause cte : rootClause.asExecutionOrder()) {
+ if (cte.source != null) {
+ readEntities.addAll(cte.source.getInputs());
+ }
+ }
+ return readEntities;
+ }
+
+ @Override
+ public HashSet<WriteEntity> getAllOutputs() {
+ HashSet<WriteEntity> writeEntities = new HashSet<WriteEntity>(getOutputs());
+ for (CTEClause cte : rootClause.asExecutionOrder()) {
+ if (cte.source != null) {
+ writeEntities.addAll(cte.source.getOutputs());
+ }
+ }
+ return writeEntities;
+ }
+
+ class CTEClause {
+ CTEClause(String alias, ASTNode cteNode) {
+ this.alias = alias;
+ this.cteNode = cteNode;
+ }
+ String alias;
+ ASTNode cteNode;
+ boolean materialize;
+ int reference;
+ QBExpr qbExpr;
+ List<CTEClause> parents = new ArrayList<CTEClause>();
+
+ // materialized
+ Table table;
+ SemanticAnalyzer source;
+
+ List<Task<? extends Serializable>> getTasks() {
+ return source == null ? null : source.rootTasks;
+ }
+
+ List<CTEClause> asExecutionOrder() {
+ List<CTEClause> execution = new ArrayList<CTEClause>();
+ asExecutionOrder(new HashSet<CTEClause>(), execution);
+ return execution;
+ }
+
+ void asExecutionOrder(Set<CTEClause> visited, List<CTEClause> execution) {
+ for (CTEClause parent : parents) {
+ if (visited.add(parent)) {
+ parent.asExecutionOrder(visited, execution);
+ }
+ }
+ execution.add(this);
+ }
+
+ @Override
+ public String toString() {
+ return alias == null ? "<root>" : alias;
+ }
+ }
+
+ private List<Task<? extends Serializable>> toRealRootTasks(List<CTEClause> execution) {
+ List<Task<? extends Serializable>> cteRoots = new ArrayList<>();
+ List<Task<? extends Serializable>> cteLeafs = new ArrayList<>();
+ List<Task<? extends Serializable>> curTopRoots = null;
+ List<Task<? extends Serializable>> curBottomLeafs = null;
+ for (int i = 0; i < execution.size(); i++) {
+ CTEClause current = execution.get(i);
+ if (current.parents.isEmpty() && curTopRoots != null) {
+ cteRoots.addAll(curTopRoots);
+ cteLeafs.addAll(curBottomLeafs);
+ curTopRoots = curBottomLeafs = null;
+ }
+ List<Task<? extends Serializable>> curTasks = current.getTasks();
+ if (curTasks == null) {
+ continue;
+ }
+ if (curTopRoots == null) {
+ curTopRoots = curTasks;
+ }
+ if (curBottomLeafs != null) {
+ for (Task<?> topLeafTask : curBottomLeafs) {
+ for (Task<?> currentRootTask : curTasks) {
+ topLeafTask.addDependentTask(currentRootTask);
+ }
+ }
+ }
+ curBottomLeafs = Task.findLeafs(curTasks);
+ }
+ if (curTopRoots != null) {
+ cteRoots.addAll(curTopRoots);
+ cteLeafs.addAll(curBottomLeafs);
+ }
+
+ if (cteRoots.isEmpty()) {
+ return rootTasks;
+ }
+ for (Task<?> cteLeafTask : cteLeafs) {
+ for (Task<?> mainRootTask : rootTasks) {
+ cteLeafTask.addDependentTask(mainRootTask);
+ }
+ }
+ return cteRoots;
+ }
+
+ Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
+
+ ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE));
+
+ ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME));
+ tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName)));
+
+ ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER));
+
+ createTable.addChild(tableName);
+ createTable.addChild(temporary);
+ createTable.addChild(cte.cteNode);
+
+ SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState);
+ analyzer.initCtx(ctx);
+ analyzer.init(false);
+
+ // should share cte contexts
+ analyzer.aliasToCTEs.putAll(aliasToCTEs);
+
+ HiveOperation operation = queryState.getHiveOperation();
+ try {
+ analyzer.analyzeInternal(createTable);
+ } finally {
+ queryState.setCommandType(operation);
+ }
+
+ Table table = analyzer.tableDesc.toTable(conf);
+ Path location = table.getDataLocation();
+ try {
+ location.getFileSystem(conf).mkdirs(location);
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+ table.setMaterializedTable(true);
+
+ LOG.info(cteName + " will be materialized into " + location);
+ cte.table = table;
+ cte.source = analyzer;
+
+ ctx.addMaterializedTable(cteName, table);
+
+ return table;
+ }
+
+
+ static boolean isJoinToken(ASTNode node) {
+ if ((node.getToken().getType() == HiveParser.TOK_JOIN)
+ || (node.getToken().getType() == HiveParser.TOK_CROSSJOIN)
+ || isOuterJoinToken(node)
+ || (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ static private boolean isOuterJoinToken(ASTNode node) {
+ return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN);
+ }
+
+ /**
+ * Given the AST with TOK_JOIN as the root, get all the aliases for the tables
+ * or subqueries in the join.
+ *
+ * @param qb
+ * @param join
+ * @throws SemanticException
+ */
+ @SuppressWarnings("nls")
+ private void processJoin(QB qb, ASTNode join) throws SemanticException {
+ int numChildren = join.getChildCount();
+ if ((numChildren != 2) && (numChildren != 3)
+ && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) {
+ throw new SemanticException(generateErrorMessage(join,
+ "Join with multiple children"));
+ }
+
+ queryProperties.incrementJoinCount(isOuterJoinToken(join));
+ for (int num = 0; num < numChildren; num++) {
+ ASTNode child = (ASTNode) join.getChild(num);
+ if (child.getToken().getType() == HiveParser.TOK_TABREF) {
+ processTable(qb, child);
+ } else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) {
+ processSubQuery(qb, child);
+ } else if (child.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
+ queryProperties.setHasPTF(true);
+ processPTF(qb, child);
+ PTFInvocationSpec ptfInvocationSpec = qb.getPTFInvocationSpec(child);
+ String inputAlias = ptfInvocationSpec == null ? null :
+ ptfInvocationSpec.getFunction().getAlias();;
+ if ( inputAlias == null ) {
+ throw new SemanticException(generateErrorMessage(child,
+ "PTF invocation in a Join must have an alias"));
+ }
+
+ } else if (child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
+ child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
+ // SELECT * FROM src1 LATERAL VIEW udtf() AS myTable JOIN src2 ...
+ // is not supported. Instead, the lateral view must be in a subquery
+ // SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a
+ // JOIN src2 ...
+ throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN
+ .getMsg(join));
+ } else if (isJoinToken(child)) {
+ processJoin(qb, child);
+ }
+ }
+ }
+
+ /**
+ * Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the
+ * table or subquery in the lateral view and also make a mapping from the
+ * alias to all the lateral view AST's.
+ *
+ * @param qb
+ * @param lateralView
+ * @return the alias for the table/subquery
+ * @throws SemanticException
+ */
+
+ private String processLateralView(QB qb, ASTNode lateralView)
+ throws SemanticException {
+ int numChildren = lateralView.getChildCount();
+
+ assert (numChildren == 2);
+ ASTNode next = (ASTNode) lateralView.getChild(1);
+
+ String alias = null;
+
+ switch (next.getToken().getType()) {
+ case HiveParser.TOK_TABREF:
+ alias = processTable(qb, next);
+ break;
+ case HiveParser.TOK_SUBQUERY:
+ alias = processSubQuery(qb, next);
+ break;
+ case HiveParser.TOK_LATERAL_VIEW:
+ case HiveParser.TOK_LATERAL_VIEW_OUTER:
+ alias = processLateralView(qb, next);
+ break;
+ default:
+ throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD
+ .getMsg(lateralView));
+ }
+ alias = alias.toLowerCase();
+ qb.getParseInfo().addLateralViewForAlias(alias, lateralView);
+ qb.addAlias(alias);
+ return alias;
+ }
+
+ /**
+ * Phase 1: (including, but not limited to):
+ *
+ * 1. Gets all the aliases for all the tables / subqueries and makes the
+ * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
+ * destination and names the clause "inclause" + i 3. Creates a map from a
+ * string representation of an aggregation tree to the actual aggregation AST
+ * 4. Creates a mapping from the clause name to the select expression AST in
+ * destToSelExpr 5. Creates a mapping from a table alias to the lateral view
+ * AST's in aliasToLateralViews
+ *
+ * @param ast
+ * @param qb
+ * @param ctx_1
+ * @throws SemanticException
+ */
+ @SuppressWarnings({"fallthrough", "nls"})
+ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx)
+ throws SemanticException {
+
+ boolean phase1Result = true;
+ QBParseInfo qbp = qb.getParseInfo();
+ boolean skipRecursion = false;
+
+ if (ast.getToken() != null) {
+ skipRecursion = true;
+ switch (ast.getToken().getType()) {
+ case HiveParser.TOK_SELECTDI:
+ qb.countSelDi();
+ // fall through
+ case HiveParser.TOK_SELECT:
+ qb.countSel();
+ qbp.setSelExprForClause(ctx_1.dest, ast);
+
+ int posn = 0;
+ if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.QUERY_HINT) {
+ ParseDriver pd = new ParseDriver();
+ String queryHintStr = ast.getChild(0).getText();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("QUERY HINT: "+queryHintStr);
+ }
+ try {
+ ASTNode hintNode = pd.parseHint(queryHintStr);
+ qbp.setHints((ASTNode) hintNode);
+ posn++;
+ } catch (ParseException e) {
+ throw new SemanticException("failed to parse query hint: "+e.getMessage(), e);
+ }
+ }
+
+ if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM))
+ queryProperties.setUsesScript(true);
+
+ LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast,
+ qb, ctx_1.dest);
+ doPhase1GetColumnAliasesFromSelect(ast, qbp);
+ qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
+ qbp.setDistinctFuncExprsForClause(ctx_1.dest,
+ doPhase1GetDistinctFuncExprs(aggregations));
+ break;
+
+ case HiveParser.TOK_WHERE:
+ qbp.setWhrExprForClause(ctx_1.dest, ast);
+ if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty())
+ queryProperties.setFilterWithSubQuery(true);
+ break;
+
+ case HiveParser.TOK_INSERT_INTO:
+ String currentDatabase = SessionState.get().getCurrentDatabase();
+ String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
+ qbp.addInsertIntoTable(tab_name, ast);
+
+ case HiveParser.TOK_DESTINATION:
+ ctx_1.dest = this.ctx.getDestNamePrefix(ast).toString() + ctx_1.nextNum;
+ ctx_1.nextNum++;
+ boolean isTmpFileDest = false;
+ if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {
+ ASTNode ch = (ASTNode) ast.getChild(0);
+ if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0
+ && ch.getChild(0) instanceof ASTNode) {
+ ch = (ASTNode) ch.getChild(0);
+ isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE;
+ } else {
+ if (ast.getToken().getType() == HiveParser.TOK_DESTINATION
+ && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
+ String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
+ SessionState.get().getCurrentDatabase());
+ qbp.getInsertOverwriteTables().put(fullTableName, ast);
+ }
+ }
+ }
+
+ // is there a insert in the subquery
+ if (qbp.getIsSubQ() && !isTmpFileDest) {
+ throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
+ }
+
+ qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
+ handleInsertStatementSpecPhase1(ast, qbp, ctx_1);
+
+ if (qbp.getClauseNamesForDest().size() == 2) {
+ // From the moment that we have two destination clauses,
+ // we know that this is a multi-insert query.
+ // Thus, set property to right value.
+ // Using qbp.getClauseNamesForDest().size() >= 2 would be
+ // equivalent, but we use == to avoid setting the property
+ // multiple times
+ queryProperties.setMultiDestQuery(true);
+ }
+
+ if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) {
+ plannerCtx.setInsertToken(ast, isTmpFileDest);
+ } else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) {
+ // For multi-insert query, currently we only optimize the FROM clause.
+ // Hence, introduce multi-insert token on top of it.
+ // However, first we need to reset existing token (insert).
+ // Using qbp.getClauseNamesForDest().size() >= 2 would be
+ // equivalent, but we use == to avoid setting the property
+ // multiple times
+ plannerCtx.resetToken();
+ plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0));
+ }
+ break;
+
+ case HiveParser.TOK_FROM:
+ int child_count = ast.getChildCount();
+ if (child_count != 1) {
+ throw new SemanticException(generateErrorMessage(ast,
+ "Multiple Children " + child_count));
+ }
+
+ if (!qbp.getIsSubQ()) {
+ qbp.setQueryFromExpr(ast);
+ }
+
+ // Check if this is a subquery / lateral view
+ ASTNode frm = (ASTNode) ast.getChild(0);
+ if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
+ processTable(qb, frm);
+ } else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) {
+ // Create a temp table with the passed values in it then rewrite this portion of the
+ // tree to be from that table.
+ ASTNode newFrom = genValuesTempTable(frm, qb);
+ ast.setChild(0, newFrom);
+ processTable(qb, newFrom);
+ } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
+ processSubQuery(qb, frm);
+ } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
+ frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
+ queryProperties.setHasLateralViews(true);
+ processLateralView(qb, frm);
+ } else if (isJoinToken(frm)) {
+ processJoin(qb, frm);
+ qbp.setJoinExpr(frm);
+ }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
+ queryProperties.setHasPTF(true);
+ processPTF(qb, frm);
+ }
+ break;
+
+ case HiveParser.TOK_CLUSTERBY:
+ // Get the clusterby aliases - these are aliased to the entries in the
+ // select list
+ queryProperties.setHasClusterBy(true);
+ qbp.setClusterByExprForClause(ctx_1.dest, ast);
+ break;
+
+ case HiveParser.TOK_DISTRIBUTEBY:
+ // Get the distribute by aliases - these are aliased to the entries in
+ // the
+ // select list
+ queryProperties.setHasDistributeBy(true);
+ qbp.setDistributeByExprForClause(ctx_1.dest, ast);
+ if (qbp.getClusterByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
+ } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
+ }
+ break;
+
+ case HiveParser.TOK_SORTBY:
+ // Get the sort by aliases - these are aliased to the entries in the
+ // select list
+ queryProperties.setHasSortBy(true);
+ qbp.setSortByExprForClause(ctx_1.dest, ast);
+ if (qbp.getClusterByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
+ } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
+ }
+
+ break;
+
+ case HiveParser.TOK_ORDERBY:
+ // Get the order by aliases - these are aliased to the entries in the
+ // select list
+ queryProperties.setHasOrderBy(true);
+ qbp.setOrderByExprForClause(ctx_1.dest, ast);
+ if (qbp.getClusterByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
+ }
+ break;
+
+ case HiveParser.TOK_GROUPBY:
+ case HiveParser.TOK_ROLLUP_GROUPBY:
+ case HiveParser.TOK_CUBE_GROUPBY:
+ case HiveParser.TOK_GROUPING_SETS:
+ // Get the groupby aliases - these are aliased to the entries in the
+ // select list
+ queryProperties.setHasGroupBy(true);
+ if (qbp.getJoinExpr() != null) {
+ queryProperties.setHasJoinFollowedByGroupBy(true);
+ }
+ if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
+ }
+ qbp.setGroupByExprForClause(ctx_1.dest, ast);
+ skipRecursion = true;
+
+ // Rollup and Cubes are syntactic sugar on top of grouping sets
+ if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
+ qbp.getDestRollups().add(ctx_1.dest);
+ } else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
+ qbp.getDestCubes().add(ctx_1.dest);
+ } else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
+ qbp.getDestGroupingSets().add(ctx_1.dest);
+ }
+ break;
+
+ case HiveParser.TOK_HAVING:
+ qbp.setHavingExprForClause(ctx_1.dest, ast);
+ qbp.addAggregationExprsForClause(ctx_1.dest,
+ doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
+ break;
+
+ case HiveParser.KW_WINDOW:
+ if (!qb.hasWindowingSpec(ctx_1.dest) ) {
+ throw new SemanticException(generateErrorMessage(ast,
+ "Query has no Cluster/Distribute By; but has a Window definition"));
+ }
+ handleQueryWindowClauses(qb, ctx_1, ast);
+ break;
+
+ case HiveParser.TOK_LIMIT:
+ if (ast.getChildCount() == 2) {
+ qbp.setDestLimit(ctx_1.dest,
+ new Integer(ast.getChild(0).getText()),
+ new Integer(ast.getChild(1).getText()));
+ } else {
+ qbp.setDestLimit(ctx_1.dest, new Integer(0),
+ new Integer(ast.getChild(0).getText()));
+ }
+ break;
+
+ case HiveParser.TOK_ANALYZE:
+ // Case of analyze command
+
+ String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase();
+
+
+ qb.setTabAlias(table_name, table_name);
+ qb.addAlias(table_name);
+ qb.getParseInfo().setIsAnalyzeCommand(true);
+ qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
+ qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan);
+ // Allow analyze the whole table and dynamic partitions
+ HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
+ HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
+
+ break;
+
+ case HiveParser.TOK_UNIONALL:
+ if (!qbp.getIsSubQ()) {
+ // this shouldn't happen. The parser should have converted the union to be
+ // contained in a subquery. Just in case, we keep the error as a fallback.
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
+ }
+ skipRecursion = false;
+ break;
+
+ case HiveParser.TOK_INSERT:
+ ASTNode destination = (ASTNode) ast.getChild(0);
+ Tree tab = destination.getChild(0);
+
+ // Proceed if AST contains partition & If Not Exists
+ if (destination.getChildCount() == 2 &&
+ tab.getChildCount() == 2 &&
+ destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
+ String tableName = tab.getChild(0).getChild(0).getText();
+
+ Tree partitions = tab.getChild(1);
+ int childCount = partitions.getChildCount();
+ HashMap<String, String> partition = new HashMap<String, String>();
+ for (int i = 0; i < childCount; i++) {
+ String partitionName = partitions.getChild(i).getChild(0).getText();
+ Tree pvalue = partitions.getChild(i).getChild(1);
+ if (pvalue == null) {
+ break;
+ }
+ String partitionVal = stripQuotes(pvalue.getText());
+ partition.put(partitionName, partitionVal);
+ }
+ // if it is a dynamic partition throw the exception
+ if (childCount != partition.size()) {
+ throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
+ .getMsg(partition.toString()));
+ }
+ Table table = null;
+ try {
+ table = this.getTableObjectByName(tableName);
+ } catch (HiveException ex) {
+ throw new SemanticException(ex);
+ }
+ try {
+ Partition parMetaData = db.getPartition(table, partition, false);
+ // Check partition exists if it exists skip the overwrite
+ if (parMetaData != null) {
+ phase1Result = false;
+ skipRecursion = true;
+ LOG.info("Partition already exists so insert into overwrite " +
+ "skipped for partition : " + parMetaData.toString());
+ break;
+ }
+ } catch (HiveException e) {
+ LOG.info("Error while getting metadata : ", e);
+ }
+ validatePartSpec(table, partition, (ASTNode)tab, conf, false);
+ }
+ skipRecursion = false;
+ break;
+ case HiveParser.TOK_LATERAL_VIEW:
+ case HiveParser.TOK_LATERAL_VIEW_OUTER:
+ // todo: nested LV
+ assert ast.getChildCount() == 1;
+ qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
+ break;
+ case HiveParser.TOK_CTE:
+ processCTE(qb, ast);
+ break;
+ default:
+ skipRecursion = false;
+ break;
+ }
+ }
+
+ if (!skipRecursion) {
+ // Iterate over the rest of the children
+ int child_count = ast.getChildCount();
+ for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
+ // Recurse
+ phase1Result = phase1Result && doPhase1(
+ (ASTNode)ast.getChild(child_pos), qb, ctx_1, plannerCtx);
+ }
+ }
+ return phase1Result;
+ }
+
+ /**
+ * This is phase1 of supporting specifying schema in insert statement
+ * insert into foo(z,y) select a,b from bar;
+ * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode)
+ * @throws SemanticException
+ */
+ private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException {
+ ASTNode tabColName = (ASTNode)ast.getChild(1);
+ if(ast.getType() == HiveParser.TOK_INSERT_INTO && tabColName != null && tabColName.getType() == HiveParser.TOK_TABCOLNAME) {
+ //we have "insert into foo(a,b)..."; parser will enforce that 1+ columns are listed if TOK_TABCOLNAME is present
+ List<String> targetColNames = new ArrayList<String>();
+ for(Node col : tabColName.getChildren()) {
+ assert ((ASTNode)col).getType() == HiveParser.Identifier :
+ "expected token " + HiveParser.Identifier + " found " + ((ASTNode)col).getType();
+ targetColNames.add(((ASTNode)col).getText());
+ }
+ String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
+ SessionState.get().getCurrentDatabase());
+ qbp.setDestSchemaForClause(ctx_1.dest, targetColNames);
+ Set<String> targetColumns = new HashSet<String>();
+ targetColumns.addAll(targetColNames);
+ if(targetColNames.size() != targetColumns.size()) {
+ throw new SemanticException(generateErrorMessage(tabColName,
+ "Duplicate column name detected in " + fullTableName + " table schema specification"));
+ }
+ Table targetTable = null;
+ try {
+ targetTable = db.getTable(fullTableName, false);
+ }
+ catch (HiveException ex) {
+ LOG.error("Error processing HiveParser.TOK_DESTINATION: " + ex.getMessage(), ex);
+ throw new SemanticException(ex);
+ }
+ if(targetTable == null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ "Unable to access metadata for table " + fullTableName));
+ }
+ for(FieldSchema f : targetTable.getCols()) {
+ //parser only allows foo(a,b), not foo(foo.a, foo.b)
+ targetColumns.remove(f.getName());
+ }
+ if(!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns
+ /* We just checked the user specified schema columns among regular table column and found some which are not
+ 'regular'. Now check is they are dynamic partition columns
+ For dynamic partitioning,
+ Given "create table multipart(a int, b int) partitioned by (c int, d int);"
+ for "insert into multipart partition(c='1',d)(d,a) values(2,3);" we expect parse tree to look like this
+ (TOK_INSERT_INTO
+ (TOK_TAB
+ (TOK_TABNAME multipart)
+ (TOK_PARTSPEC
+ (TOK_PARTVAL c '1')
+ (TOK_PARTVAL d)
+ )
+ )
+ (TOK_TABCOLNAME d a)
+ )*/
+ List<String> dynamicPartitionColumns = new ArrayList<String>();
+ if(ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
+ ASTNode tokTab = (ASTNode)ast.getChild(0);
+ ASTNode tokPartSpec = (ASTNode)tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC);
+ if(tokPartSpec != null) {
+ for(Node n : tokPartSpec.getChildren()) {
+ ASTNode tokPartVal = null;
+ if(n instanceof ASTNode) {
+ tokPartVal = (ASTNode)n;
+ }
+ if(tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL && tokPartVal.getChildCount() == 1) {
+ assert tokPartVal.getChild(0).getType() == HiveParser.Identifier :
+ "Expected column name; found tokType=" + tokPartVal.getType();
+ dynamicPartitionColumns.add(tokPartVal.getChild(0).getText());
+ }
+ }
+ }
+ }
+ for(String colName : dynamicPartitionColumns) {
+ targetColumns.remove(colName);
+ }
+ if(!targetColumns.isEmpty()) {
+ //Found some columns in user specified schema which are neither regular not dynamic partition columns
+ throw new SemanticException(generateErrorMessage(tabColName,
+ "'" + (targetColumns.size() == 1 ? targetColumns.iterator().next() : targetColumns) +
+ "' in insert schema specification " + (targetColumns.size() == 1 ? "is" : "are") +
+ " not found among regular columns of " +
+ fullTableName + " nor dynamic partition columns."));
+ }
+ }
+ }
+ }
+
+ public void getMaterializationMetadata(QB qb) throws SemanticException {
+ try {
+ gatherCTEReferences(qb, rootClause);
+ int threshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD);
+ for (CTEClause cte : Sets.newHashSet(aliasToCTEs.values())) {
+ if (threshold >= 0 && cte.reference >= threshold) {
+ cte.materialize = true;
+ }
+ }
+ } catch (HiveException e) {
+ // Has to use full name to make sure it does not conflict with
+ // org.apache.commons.lang.StringUtils
+ LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+ if (e instanceof SemanticException) {
+ throw (SemanticException)e;
+ }
+ throw new SemanticException(e.getMessage(), e);
+ }
+ }
+
+ private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent) throws HiveException {
+ if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
+ gatherCTEReferences(qbexpr.getQB(), parent);
+ } else {
+ gatherCTEReferences(qbexpr.getQBExpr1(), parent);
+ gatherCTEReferences(qbexpr.getQBExpr2(), parent);
+ }
+ }
+
+ // TODO: check view references, too
+ private void gatherCTEReferences(QB qb, CTEClause current) throws HiveException {
+ for (String alias : qb.getTabAliases()) {
+ String tabName = qb.getTabNameForAlias(alias);
+ String cteName = tabName.toLowerCase();
+
+ CTEClause cte = findCTEFromName(qb, cteName);
+ if (cte != null) {
+ if (ctesExpanded.contains(cteName)) {
+ throw new SemanticException("Recursive cte " + cteName +
+ " detected (cycle: " + StringUtils.join(ctesExpanded, " -> ") +
+ " -> " + cteName + ").");
+ }
+ cte.reference++;
+ current.parents.add(cte);
+ if (cte.qbExpr != null) {
+ continue;
+ }
+ cte.qbExpr = new QBExpr(cteName);
+ doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName);
+
+ ctesExpanded.add(cteName);
+ gatherCTEReferences(cte.qbExpr, cte);
+ ctesExpanded.remove(ctesExpanded.size() - 1);
+ }
+ }
+ for (String alias : qb.getSubqAliases()) {
+ gatherCTEReferences(qb.getSubqForAlias(alias), current);
+ }
+ }
+
+ public void getMetaData(QB qb) throws SemanticException {
+ getMetaData(qb, false);
+ }
+
+ public void getMetaData(QB qb, boolean enableMaterialization) throws SemanticException {
+ try {
+ if (enableMaterialization) {
+ getMaterializationMetadata(qb);
+ }
+ getMetaData(qb, null);
+ } catch (HiveException e) {
+ // Has to use full name to make sure it does not conflict with
+ // org.apache.commons.lang.StringUtils
+ LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+ if (e instanceof SemanticException) {
+ throw (SemanticException)e;
+ }
+ throw new SemanticException(e.getMessage(), e);
+ }
+ }
+
+ private void getMetaData(QBExpr qbexpr, ReadEntity parentInput)
+ throws HiveException {
+ if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
+ getMetaData(qbexpr.getQB(), parentInput);
+ } else {
+ getMetaData(qbexpr.getQBExpr1(), parentInput);
+ getMetaData(qbexpr.getQBExpr2(), parentInput);
+ }
+ }
+
+ @SuppressWarnings("nls")
+ private void getMetaData(QB qb, ReadEntity parentInput)
+ throws HiveException {
+ LOG.info("Get metadata for source tables");
+
+ // Go over the tables and populate the related structures.
+ // We have to materialize the table alias list since we might
+ // modify it in the middle for view rewrite.
+ List<String> tabAliases = new ArrayList<String>(qb.getTabAliases());
+
+ // Keep track of view alias to view name and read entity
+ // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
+ // keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
+ // This is needed for tracking the dependencies for inputs, along with their parents.
+ Map<String, ObjectPair<String, ReadEntity>> aliasToViewInfo =
+ new HashMap<String, ObjectPair<String, ReadEntity>>();
+
+ /*
+ * used to capture view to SQ conversions. This is used to check for
+ * recursive CTE invocations.
+ */
+ Map<String, String> sqAliasToCTEName = new HashMap<String, String>();
+
+ for (String alias : tabAliases) {
+ String tabName = qb.getTabNameForAlias(alias);
+ String cteName = tabName.toLowerCase();
+
+ Table tab = db.getTable(tabName, false);
+ if (tab == null ||
+ tab.getDbName().equals(SessionState.get().getCurrentDatabase())) {
+ Table materializedTab = ctx.getMaterializedTable(cteName);
+ if (materializedTab == null) {
+ // we first look for this alias from CTE, and then from catalog.
+ CTEClause cte = findCTEFromName(qb, cteName);
+ if (cte != null) {
+ if (!cte.materialize) {
+ addCTEAsSubQuery(qb, cteName, alias);
+ sqAliasToCTEName.put(alias, cteName);
+ continue;
+ }
+ tab = materializeCTE(cteName, cte);
+ }
+ } else {
+ tab = materializedTab;
+ }
+ }
+
+ if (tab == null) {
+ ASTNode src = qb.getParseInfo().getSrcForAlias(alias);
+ if (null != src) {
+ throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src));
+ } else {
+ throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
+ }
+ }
+ if (tab.isView()) {
+ if (qb.getParseInfo().isAnalyzeCommand()) {
+ throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
+ }
+ String fullViewName = tab.getDbName() + "." + tab.getTableName();
+ // Prevent view cycles
+ if (viewsExpanded.contains(fullViewName)) {
+ throw new SemanticException("Recursive view " + fullViewName +
+ " detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") +
+ " -> " + fullViewName + ").");
+ }
+ replaceViewReferenceWithDefinition(qb, tab, tabName, alias);
+ // This is the last time we'll see the Table objects for views, so add it to the inputs
+ // now. isInsideView will tell if this view is embedded in another view.
+ // If the view is Inside another view, it should have at least one parent
+ if (qb.isInsideView() && parentInput == null) {
+ parentInput = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
+ }
+ ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView());
+ viewInput = PlanUtils.addInput(inputs, viewInput);
+ aliasToViewInfo.put(alias, new ObjectPair<String, ReadEntity>(fullViewName, viewInput));
+ String aliasId = getAliasId(alias, qb);
+ if (aliasId != null) {
+ aliasId = aliasId.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "")
+ .replace(SemanticAnalyzer.SUBQUERY_TAG_2, "");
+ }
+ viewAliasToInput.put(aliasId, viewInput);
+ continue;
+ }
+
+ if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
+ throw new SemanticException(generateErrorMessage(
+ qb.getParseInfo().getSrcForAlias(alias),
+ ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
+ }
+
+ qb.getMetaData().setSrcForAlias(alias, tab);
+
+ if (qb.getParseInfo().isAnalyzeCommand()) {
+ // allow partial partition specification for nonscan since noscan is fast.
+ TableSpec ts = new TableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan);
+ if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions
+ try {
+ ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
+ } catch (HiveException e) {
+ throw new SemanticException(generateErrorMessage(
+ qb.getParseInfo().getSrcForAlias(alias),
+ "Cannot get partitions for " + ts.partSpec), e);
+ }
+ }
+ // validate partial scan command
+ QBParseInfo qbpi = qb.getParseInfo();
+ if (qbpi.isPartialScanAnalyzeCommand()) {
+ Class<? extends InputFormat> inputFormatClass = null;
+ switch (ts.specType) {
+ case TABLE_ONLY:
+ case DYNAMIC_PARTITION:
+ inputFormatClass = ts.tableHandle.getInputFormatClass();
+ break;
+ case STATIC_PARTITION:
+ inputFormatClass = ts.partHandle.getInputFormatClass();
+ break;
+ default:
+ assert false;
+ }
+ // throw a HiveException for formats other than rcfile or orcfile.
+ if (!(inputFormatClass.equals(RCFileInputFormat.class) || inputFormatClass
+ .equals(OrcInputFormat.class))) {
+ throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_NON_RCFILE.getMsg());
+ }
+ }
+
+ tab.setTableSpec(ts);
+ qb.getParseInfo().addTableSpec(alias, ts);
+ }
+
+ ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
+ // Temporary tables created during the execution are not the input sources
+ if (!PlanUtils.isValuesTempTable(alias)) {
+ PlanUtils.addInput(inputs,
+ new ReadEntity(tab, parentViewInfo, parentViewInfo == null),mergeIsDirect);
+ }
+ }
+
+ LOG.info("Get metadata for subqueries");
+ // Go over the subqueries and getMetaData for these
+ for (String alias : qb.getSubqAliases()) {
+ boolean wasView = aliasToViewInfo.containsKey(alias);
+ boolean wasCTE = sqAliasToCTEName.containsKey(alias);
+ ReadEntity newParentInput = null;
+ if (wasView) {
+ viewsExpanded.add(aliasToViewInfo.get(alias).getFirst());
+ newParentInput = aliasToViewInfo.get(alias).getSecond();
+ } else if (wasCTE) {
+ ctesExpanded.add(sqAliasToCTEName.get(alias));
+ }
<TRUNCATED>