You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/04/25 15:45:53 UTC
[1/3] hive git commit: HIVE-16501 : Add rej/orig to .gitignore ;
remove *.orig files (Zoltan Haindrich via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master f1e0b4fc1 -> c911f4203
http://git-wip-us.apache.org/repos/asf/hive/blob/c911f420/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig
deleted file mode 100644
index b5a5645..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig
+++ /dev/null
@@ -1,13508 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.parse;
-
-import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.Serializable;
-import java.security.AccessControlException;
-import java.util.ArrayDeque;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Deque;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Queue;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.UUID;
-import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
-
-import org.antlr.runtime.ClassicToken;
-import org.antlr.runtime.CommonToken;
-import org.antlr.runtime.Token;
-import org.antlr.runtime.tree.Tree;
-import org.antlr.runtime.tree.TreeVisitor;
-import org.antlr.runtime.tree.TreeVisitorAction;
-import org.antlr.runtime.tree.TreeWizard;
-import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
-import org.apache.calcite.rel.RelNode;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsAction;
-import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.hive.common.ObjectPair;
-import org.apache.hadoop.hive.common.StatsSetupConst;
-import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-import org.apache.hadoop.hive.metastore.TableType;
-import org.apache.hadoop.hive.metastore.Warehouse;
-import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.Order;
-import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
-import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
-import org.apache.hadoop.hive.ql.CompilationOpContext;
-import org.apache.hadoop.hive.ql.Context;
-import org.apache.hadoop.hive.ql.ErrorMsg;
-import org.apache.hadoop.hive.ql.QueryProperties;
-import org.apache.hadoop.hive.ql.QueryState;
-import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
-import org.apache.hadoop.hive.ql.exec.FetchTask;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.OperatorFactory;
-import org.apache.hadoop.hive.ql.exec.RecordReader;
-import org.apache.hadoop.hive.ql.exec.RecordWriter;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.exec.TaskFactory;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.hooks.ReadEntity;
-import org.apache.hadoop.hive.ql.hooks.WriteEntity;
-import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
-import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.io.AcidUtils.Operation;
-import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
-import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
-import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
-import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
-import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
-import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
-import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.GraphWalker;
-import org.apache.hadoop.hive.ql.lib.Node;
-import org.apache.hadoop.hive.ql.metadata.DummyPartition;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.HiveUtils;
-import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
-import org.apache.hadoop.hive.ql.metadata.Partition;
-import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
-import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import org.apache.hadoop.hive.ql.optimizer.Optimizer;
-import org.apache.hadoop.hive.ql.optimizer.Transform;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc;
-import org.apache.hadoop.hive.ql.optimizer.lineage.Generator;
-import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
-import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType;
-import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher;
-import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;
-import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType;
-import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
-import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
-import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
-import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
-import org.apache.hadoop.hive.ql.plan.CreateViewDesc;
-import org.apache.hadoop.hive.ql.plan.DDLWork;
-import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
-import org.apache.hadoop.hive.ql.plan.FilterDesc;
-import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc;
-import org.apache.hadoop.hive.ql.plan.ForwardDesc;
-import org.apache.hadoop.hive.ql.plan.GroupByDesc;
-import org.apache.hadoop.hive.ql.plan.HiveOperation;
-import org.apache.hadoop.hive.ql.plan.InsertTableDesc;
-import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
-import org.apache.hadoop.hive.ql.plan.JoinDesc;
-import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc;
-import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
-import org.apache.hadoop.hive.ql.plan.LimitDesc;
-import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
-import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
-import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-import org.apache.hadoop.hive.ql.plan.PTFDesc;
-import org.apache.hadoop.hive.ql.plan.PlanUtils;
-import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
-import org.apache.hadoop.hive.ql.plan.ScriptDesc;
-import org.apache.hadoop.hive.ql.plan.SelectDesc;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.plan.UnionDesc;
-import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
-import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
-import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.ql.util.ResourceDownloader;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.Deserializer;
-import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
-import org.apache.hadoop.hive.serde2.NoOpFetchFormatter;
-import org.apache.hadoop.hive.serde2.NullStructSerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.SerDeUtils;
-import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.shims.HadoopShims;
-import org.apache.hadoop.hive.shims.Utils;
-import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.OutputFormat;
-import org.apache.hadoop.security.UserGroupInformation;
-
-import com.google.common.base.Splitter;
-import com.google.common.base.Strings;
-import com.google.common.collect.Sets;
-import com.google.common.math.IntMath;
-
-/**
- * Implementation of the semantic analyzer. It generates the query plan.
- * There are other specific semantic analyzers for some hive operations such as
- * DDLSemanticAnalyzer for ddl operations.
- */
-
-public class SemanticAnalyzer extends BaseSemanticAnalyzer {
-
- public static final String DUMMY_DATABASE = "_dummy_database";
- public static final String DUMMY_TABLE = "_dummy_table";
- public static final String SUBQUERY_TAG_1 = "-subquery1";
- public static final String SUBQUERY_TAG_2 = "-subquery2";
-
- // Max characters when auto generating the column name with func name
- private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
-
- public static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__";
-
- static final String MATERIALIZATION_MARKER = "$MATERIALIZATION";
-
- private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner;
- private HashMap<TableScanOperator, PrunedPartitionList> opToPartList;
- protected HashMap<String, TableScanOperator> topOps;
- protected LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx;
- private List<LoadTableDesc> loadTableWork;
- private List<LoadFileDesc> loadFileWork;
- private List<ColumnStatsAutoGatherContext> columnStatsAutoGatherContexts;
- private final Map<JoinOperator, QBJoinTree> joinContext;
- private final Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext;
- private final HashMap<TableScanOperator, Table> topToTable;
- private final Map<FileSinkOperator, Table> fsopToTable;
- private final List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting;
- private final HashMap<TableScanOperator, Map<String, String>> topToTableProps;
- private QB qb;
- private ASTNode ast;
- private int destTableId;
- private UnionProcContext uCtx;
- List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer;
- private HashMap<TableScanOperator, SampleDesc> opToSamplePruner;
- private final Map<TableScanOperator, Map<String, ExprNodeDesc>> opToPartToSkewedPruner;
- private Map<SelectOperator, Table> viewProjectToTableSchema;
- /**
- * a map for the split sampling, from alias to an instance of SplitSample
- * that describes percentage and number.
- */
- private final HashMap<String, SplitSample> nameToSplitSample;
- Map<GroupByOperator, Set<String>> groupOpToInputTables;
- Map<String, PrunedPartitionList> prunedPartitions;
- protected List<FieldSchema> resultSchema;
- protected CreateViewDesc createVwDesc;
- protected ArrayList<String> viewsExpanded;
- protected ASTNode viewSelect;
- protected final UnparseTranslator unparseTranslator;
- private final GlobalLimitCtx globalLimitCtx;
-
- // prefix for column names auto generated by hive
- private final String autogenColAliasPrfxLbl;
- private final boolean autogenColAliasPrfxIncludeFuncName;
-
- // Keep track of view alias to read entity corresponding to the view
- // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
- // keeps track of aliases for V3, V3:V2, V3:V2:V1.
- // This is used when T is added as an input for the query, the parents of T is
- // derived from the alias V3:V2:V1:T
- private final Map<String, ReadEntity> viewAliasToInput;
-
- //need merge isDirect flag to input even if the newInput does not have a parent
- private boolean mergeIsDirect;
-
- // flag for no scan during analyze ... compute statistics
- protected boolean noscan;
-
- //flag for partial scan during analyze ... compute statistics
- protected boolean partialscan;
-
- protected volatile boolean disableJoinMerge = false;
- protected final boolean defaultJoinMerge;
-
- /*
- * Capture the CTE definitions in a Query.
- */
- final Map<String, CTEClause> aliasToCTEs;
-
- /*
- * Used to check recursive CTE invocations. Similar to viewsExpanded
- */
- ArrayList<String> ctesExpanded;
-
- /*
- * Whether root tasks after materialized CTE linkage have been resolved
- */
- boolean rootTasksResolved;
-
- protected TableMask tableMask;
-
- CreateTableDesc tableDesc;
-
- /** Not thread-safe. */
- final ASTSearcher astSearcher = new ASTSearcher();
-
- protected AnalyzeRewriteContext analyzeRewrite;
-
- // A mapping from a tableName to a table object in metastore.
- Map<String, Table> tabNameToTabObject;
-
- // The tokens we should ignore when we are trying to do table masking.
- private final Set<Integer> ignoredTokens = Sets.newHashSet(HiveParser.TOK_GROUPBY,
- HiveParser.TOK_ORDERBY, HiveParser.TOK_WINDOWSPEC, HiveParser.TOK_CLUSTERBY,
- HiveParser.TOK_DISTRIBUTEBY, HiveParser.TOK_SORTBY);
-
- static class Phase1Ctx {
- String dest;
- int nextNum;
- }
-
- public SemanticAnalyzer(QueryState queryState) throws SemanticException {
- super(queryState);
- opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>();
- opToPartList = new HashMap<TableScanOperator, PrunedPartitionList>();
- opToSamplePruner = new HashMap<TableScanOperator, SampleDesc>();
- nameToSplitSample = new HashMap<String, SplitSample>();
- // Must be deterministic order maps - see HIVE-8707
- topOps = new LinkedHashMap<String, TableScanOperator>();
- loadTableWork = new ArrayList<LoadTableDesc>();
- loadFileWork = new ArrayList<LoadFileDesc>();
- columnStatsAutoGatherContexts = new ArrayList<ColumnStatsAutoGatherContext>();
- opParseCtx = new LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext>();
- joinContext = new HashMap<JoinOperator, QBJoinTree>();
- smbMapJoinContext = new HashMap<SMBMapJoinOperator, QBJoinTree>();
- // Must be deterministic order map for consistent q-test output across Java versions
- topToTable = new LinkedHashMap<TableScanOperator, Table>();
- fsopToTable = new HashMap<FileSinkOperator, Table>();
- reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList<ReduceSinkOperator>();
- topToTableProps = new HashMap<TableScanOperator, Map<String, String>>();
- destTableId = 1;
- uCtx = null;
- listMapJoinOpsNoReducer = new ArrayList<AbstractMapJoinOperator<? extends MapJoinDesc>>();
- groupOpToInputTables = new HashMap<GroupByOperator, Set<String>>();
- prunedPartitions = new HashMap<String, PrunedPartitionList>();
- tabNameToTabObject = new HashMap<String, Table>();
- unparseTranslator = new UnparseTranslator(conf);
- autogenColAliasPrfxLbl = HiveConf.getVar(conf,
- HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL);
- autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
- HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
- queryProperties = new QueryProperties();
- opToPartToSkewedPruner = new HashMap<TableScanOperator, Map<String, ExprNodeDesc>>();
- aliasToCTEs = new HashMap<String, CTEClause>();
- globalLimitCtx = new GlobalLimitCtx();
- viewAliasToInput = new HashMap<String, ReadEntity>();
- mergeIsDirect = true;
- noscan = partialscan = false;
- tabNameToTabObject = new HashMap<>();
- defaultJoinMerge = false == HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MERGE_NWAY_JOINS);
- disableJoinMerge = defaultJoinMerge;
- }
-
- @Override
- protected void reset(boolean clearPartsCache) {
- super.reset(true);
- if(clearPartsCache) {
- prunedPartitions.clear();
-
- //When init(true) combine with genResolvedParseTree, it will generate Resolved Parse tree from syntax tree
- //ReadEntity created under these conditions should be all relevant to the syntax tree even the ones without parents
- //set mergeIsDirect to true here.
- mergeIsDirect = true;
- } else {
- mergeIsDirect = false;
- }
- tabNameToTabObject.clear();
- loadTableWork.clear();
- loadFileWork.clear();
- columnStatsAutoGatherContexts.clear();
- topOps.clear();
- destTableId = 1;
- idToTableNameMap.clear();
- qb = null;
- ast = null;
- uCtx = null;
- joinContext.clear();
- smbMapJoinContext.clear();
- opParseCtx.clear();
- groupOpToInputTables.clear();
- disableJoinMerge = defaultJoinMerge;
- aliasToCTEs.clear();
- topToTable.clear();
- opToPartPruner.clear();
- opToPartList.clear();
- opToPartToSkewedPruner.clear();
- opToSamplePruner.clear();
- nameToSplitSample.clear();
- fsopToTable.clear();
- resultSchema = null;
- createVwDesc = null;
- viewsExpanded = null;
- viewSelect = null;
- ctesExpanded = null;
- globalLimitCtx.disableOpt();
- viewAliasToInput.clear();
- reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
- topToTableProps.clear();
- listMapJoinOpsNoReducer.clear();
- unparseTranslator.clear();
- queryProperties.clear();
- outputs.clear();
- }
-
- public void initParseCtx(ParseContext pctx) {
- opToPartPruner = pctx.getOpToPartPruner();
- opToPartList = pctx.getOpToPartList();
- opToSamplePruner = pctx.getOpToSamplePruner();
- topOps = pctx.getTopOps();
- loadTableWork = pctx.getLoadTableWork();
- loadFileWork = pctx.getLoadFileWork();
- ctx = pctx.getContext();
- destTableId = pctx.getDestTableId();
- idToTableNameMap = pctx.getIdToTableNameMap();
- uCtx = pctx.getUCtx();
- listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
- prunedPartitions = pctx.getPrunedPartitions();
- tabNameToTabObject = pctx.getTabNameToTabObject();
- fetchTask = pctx.getFetchTask();
- setLineageInfo(pctx.getLineageInfo());
- }
-
- public ParseContext getParseContext() {
- // Make sure the basic query properties are initialized
- copyInfoToQueryProperties(queryProperties);
- return new ParseContext(queryState, opToPartPruner, opToPartList, topOps,
- new HashSet<JoinOperator>(joinContext.keySet()),
- new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()),
- loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx,
- listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject,
- opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
- opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting,
- analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
- }
-
- public CompilationOpContext getOpContext() {
- return ctx.getOpContext();
- }
-
- public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias)
- throws SemanticException {
- doPhase1QBExpr(ast, qbexpr, id, alias, false);
- }
- @SuppressWarnings("nls")
- public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, boolean insideView)
- throws SemanticException {
-
- assert (ast.getToken() != null);
- if (ast.getToken().getType() == HiveParser.TOK_QUERY) {
- QB qb = new QB(id, alias, true);
- qb.setInsideView(insideView);
- Phase1Ctx ctx_1 = initPhase1Ctx();
- doPhase1(ast, qb, ctx_1, null);
-
- qbexpr.setOpcode(QBExpr.Opcode.NULLOP);
- qbexpr.setQB(qb);
- }
- // setop
- else {
- switch (ast.getToken().getType()) {
- case HiveParser.TOK_UNIONALL:
- qbexpr.setOpcode(QBExpr.Opcode.UNION);
- break;
- case HiveParser.TOK_INTERSECTALL:
- qbexpr.setOpcode(QBExpr.Opcode.INTERSECTALL);
- break;
- case HiveParser.TOK_INTERSECTDISTINCT:
- qbexpr.setOpcode(QBExpr.Opcode.INTERSECT);
- break;
- case HiveParser.TOK_EXCEPTALL:
- qbexpr.setOpcode(QBExpr.Opcode.EXCEPTALL);
- break;
- case HiveParser.TOK_EXCEPTDISTINCT:
- qbexpr.setOpcode(QBExpr.Opcode.EXCEPT);
- break;
- default:
- throw new SemanticException(ErrorMsg.UNSUPPORTED_SET_OPERATOR.getMsg("Type "
- + ast.getToken().getType()));
- }
- // query 1
- assert (ast.getChild(0) != null);
- QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1);
- doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + SUBQUERY_TAG_1, alias
- + SUBQUERY_TAG_1, insideView);
- qbexpr.setQBExpr1(qbexpr1);
-
- // query 2
- assert (ast.getChild(1) != null);
- QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2);
- doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + SUBQUERY_TAG_2, alias
- + SUBQUERY_TAG_2, insideView);
- qbexpr.setQBExpr2(qbexpr2);
- }
- }
-
- private LinkedHashMap<String, ASTNode> doPhase1GetAggregationsFromSelect(
- ASTNode selExpr, QB qb, String dest) throws SemanticException {
-
- // Iterate over the selects search for aggregation Trees.
- // Use String as keys to eliminate duplicate trees.
- LinkedHashMap<String, ASTNode> aggregationTrees = new LinkedHashMap<String, ASTNode>();
- List<ASTNode> wdwFns = new ArrayList<ASTNode>();
- for (int i = 0; i < selExpr.getChildCount(); ++i) {
- ASTNode function = (ASTNode) selExpr.getChild(i);
- if (function.getType() == HiveParser.TOK_SELEXPR ||
- function.getType() == HiveParser.TOK_SUBQUERY_EXPR) {
- function = (ASTNode)function.getChild(0);
- }
- doPhase1GetAllAggregations(function, aggregationTrees, wdwFns);
- }
-
- // window based aggregations are handled differently
- for (ASTNode wdwFn : wdwFns) {
- WindowingSpec spec = qb.getWindowingSpec(dest);
- if(spec == null) {
- queryProperties.setHasWindowing(true);
- spec = new WindowingSpec();
- qb.addDestToWindowingSpec(dest, spec);
- }
- HashMap<String, ASTNode> wExprsInDest = qb.getParseInfo().getWindowingExprsForClause(dest);
- int wColIdx = spec.getWindowExpressions() == null ? 0 : spec.getWindowExpressions().size();
- WindowFunctionSpec wFnSpec = processWindowFunction(wdwFn,
- (ASTNode)wdwFn.getChild(wdwFn.getChildCount()-1));
- // If this is a duplicate invocation of a function; don't add to WindowingSpec.
- if ( wExprsInDest != null &&
- wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) {
- continue;
- }
- wFnSpec.setAlias(wFnSpec.getName() + "_window_" + wColIdx);
- spec.addWindowFunction(wFnSpec);
- qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression());
- }
-
- return aggregationTrees;
- }
-
- private void doPhase1GetColumnAliasesFromSelect(
- ASTNode selectExpr, QBParseInfo qbp) {
- for (int i = 0; i < selectExpr.getChildCount(); ++i) {
- ASTNode selExpr = (ASTNode) selectExpr.getChild(i);
- if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR)
- && (selExpr.getChildCount() == 2)) {
- String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText());
- qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias);
- }
- }
- }
-
- /**
- * DFS-scan the expressionTree to find all aggregation subtrees and put them
- * in aggregations.
- *
- * @param expressionTree
- * @param aggregations
- * the key to the HashTable is the toStringTree() representation of
- * the aggregation subtree.
- * @throws SemanticException
- */
- private void doPhase1GetAllAggregations(ASTNode expressionTree,
- HashMap<String, ASTNode> aggregations, List<ASTNode> wdwFns) throws SemanticException {
- int exprTokenType = expressionTree.getToken().getType();
- if(exprTokenType == HiveParser.TOK_SUBQUERY_EXPR) {
- //since now we have scalar subqueries we can get subquery expression in having
- // we don't want to include aggregate from within subquery
- return;
- }
-
- if (exprTokenType == HiveParser.TOK_FUNCTION
- || exprTokenType == HiveParser.TOK_FUNCTIONDI
- || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
- assert (expressionTree.getChildCount() != 0);
- if (expressionTree.getChild(expressionTree.getChildCount()-1).getType()
- == HiveParser.TOK_WINDOWSPEC) {
- // If it is a windowing spec, we include it in the list
- // Further, we will examine its children AST nodes to check whether
- // there are aggregation functions within
- wdwFns.add(expressionTree);
- doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(expressionTree.getChildCount()-1),
- aggregations, wdwFns);
- return;
- }
- if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {
- String functionName = unescapeIdentifier(expressionTree.getChild(0)
- .getText());
- // Validate the function name
- if (FunctionRegistry.getFunctionInfo(functionName) == null) {
- throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName));
- }
- if(FunctionRegistry.impliesOrder(functionName)) {
- throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
- }
- if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
- if(containsLeadLagUDF(expressionTree)) {
- throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
- }
- aggregations.put(expressionTree.toStringTree(), expressionTree);
- FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName);
- if (!fi.isNative()) {
- unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree
- .getChild(0));
- }
- return;
- }
- }
- }
- for (int i = 0; i < expressionTree.getChildCount(); i++) {
- doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i),
- aggregations, wdwFns);
- }
- }
-
- private List<ASTNode> doPhase1GetDistinctFuncExprs(
- HashMap<String, ASTNode> aggregationTrees) throws SemanticException {
- List<ASTNode> exprs = new ArrayList<ASTNode>();
- for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
- ASTNode value = entry.getValue();
- assert (value != null);
- if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) {
- exprs.add(value);
- }
- }
- return exprs;
- }
-
- public static String generateErrorMessage(ASTNode ast, String message) {
- StringBuilder sb = new StringBuilder();
- if (ast == null) {
- sb.append(message).append(". Cannot tell the position of null AST.");
- return sb.toString();
- }
- sb.append(ast.getLine());
- sb.append(":");
- sb.append(ast.getCharPositionInLine());
- sb.append(" ");
- sb.append(message);
- sb.append(". Error encountered near token '");
- sb.append(ErrorMsg.getText(ast));
- sb.append("'");
- return sb.toString();
- }
-
- ASTNode getAST() {
- return this.ast;
- }
-
- protected void setAST(ASTNode newAST) {
- this.ast = newAST;
- }
-
- int[] findTabRefIdxs(ASTNode tabref) {
- assert tabref.getType() == HiveParser.TOK_TABREF;
- int aliasIndex = 0;
- int propsIndex = -1;
- int tsampleIndex = -1;
- int ssampleIndex = -1;
- for (int index = 1; index < tabref.getChildCount(); index++) {
- ASTNode ct = (ASTNode) tabref.getChild(index);
- if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE) {
- tsampleIndex = index;
- } else if (ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE) {
- ssampleIndex = index;
- } else if (ct.getToken().getType() == HiveParser.TOK_TABLEPROPERTIES) {
- propsIndex = index;
- } else {
- aliasIndex = index;
- }
- }
- return new int[] {aliasIndex, propsIndex, tsampleIndex, ssampleIndex};
- }
- String findSimpleTableName(ASTNode tabref, int aliasIndex) {
- assert tabref.getType() == HiveParser.TOK_TABREF;
- ASTNode tableTree = (ASTNode) (tabref.getChild(0));
-
- String alias;
- if (aliasIndex != 0) {
- alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText());
- }
- else {
- alias = getUnescapedUnqualifiedTableName(tableTree);
- }
- return alias;
- }
- /**
- * Goes though the tabref tree and finds the alias for the table. Once found,
- * it records the table name-> alias association in aliasToTabs. It also makes
- * an association from the alias to the table AST in parse info.
- *
- * @return the alias of the table
- */
- private String processTable(QB qb, ASTNode tabref) throws SemanticException {
- // For each table reference get the table name
- // and the alias (if alias is not present, the table name
- // is used as an alias)
- int[] indexes = findTabRefIdxs(tabref);
- int aliasIndex = indexes[0];
- int propsIndex = indexes[1];
- int tsampleIndex = indexes[2];
- int ssampleIndex = indexes[3];
-
- ASTNode tableTree = (ASTNode) (tabref.getChild(0));
-
- String tabIdName = getUnescapedName(tableTree).toLowerCase();
-
- String alias = findSimpleTableName(tabref, aliasIndex);
-
- if (propsIndex >= 0) {
- Tree propsAST = tabref.getChild(propsIndex);
- Map<String, String> props = DDLSemanticAnalyzer.getProps((ASTNode) propsAST.getChild(0));
- // We get the information from Calcite.
- if ("TRUE".equals(props.get("insideView"))) {
- qb.getAliasInsideView().add(alias.toLowerCase());
- }
- qb.setTabProps(alias, props);
- }
-
- // If the alias is already there then we have a conflict
- if (qb.exists(alias)) {
- throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref
- .getChild(aliasIndex)));
- }
- if (tsampleIndex >= 0) {
- ASTNode sampleClause = (ASTNode) tabref.getChild(tsampleIndex);
- ArrayList<ASTNode> sampleCols = new ArrayList<ASTNode>();
- if (sampleClause.getChildCount() > 2) {
- for (int i = 2; i < sampleClause.getChildCount(); i++) {
- sampleCols.add((ASTNode) sampleClause.getChild(i));
- }
- }
- // TODO: For now only support sampling on up to two columns
- // Need to change it to list of columns
- if (sampleCols.size() > 2) {
- throw new SemanticException(generateErrorMessage(
- (ASTNode) tabref.getChild(0),
- ErrorMsg.SAMPLE_RESTRICTION.getMsg()));
- }
- TableSample tabSample = new TableSample(
- unescapeIdentifier(sampleClause.getChild(0).getText()),
- unescapeIdentifier(sampleClause.getChild(1).getText()),
- sampleCols);
- qb.getParseInfo().setTabSample(alias, tabSample);
- if (unparseTranslator.isEnabled()) {
- for (ASTNode sampleCol : sampleCols) {
- unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol
- .getChild(0));
- }
- }
- } else if (ssampleIndex >= 0) {
- ASTNode sampleClause = (ASTNode) tabref.getChild(ssampleIndex);
-
- Tree type = sampleClause.getChild(0);
- Tree numerator = sampleClause.getChild(1);
- String value = unescapeIdentifier(numerator.getText());
-
-
- SplitSample sample;
- if (type.getType() == HiveParser.TOK_PERCENT) {
- assertCombineInputFormat(numerator, "Percentage");
- Double percent = Double.valueOf(value).doubleValue();
- if (percent < 0 || percent > 100) {
- throw new SemanticException(generateErrorMessage((ASTNode) numerator,
- "Sampling percentage should be between 0 and 100"));
- }
- int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
- sample = new SplitSample(percent, seedNum);
- } else if (type.getType() == HiveParser.TOK_ROWCOUNT) {
- sample = new SplitSample(Integer.parseInt(value));
- } else {
- assert type.getType() == HiveParser.TOK_LENGTH;
- assertCombineInputFormat(numerator, "Total Length");
- long length = Integer.parseInt(value.substring(0, value.length() - 1));
- char last = value.charAt(value.length() - 1);
- if (last == 'k' || last == 'K') {
- length <<= 10;
- } else if (last == 'm' || last == 'M') {
- length <<= 20;
- } else if (last == 'g' || last == 'G') {
- length <<= 30;
- }
- int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
- sample = new SplitSample(length, seedNum);
- }
- String alias_id = getAliasId(alias, qb);
- nameToSplitSample.put(alias_id, sample);
- }
- // Insert this map into the stats
- qb.setTabAlias(alias, tabIdName);
- if (qb.isInsideView()) {
- qb.getAliasInsideView().add(alias.toLowerCase());
- }
- qb.addAlias(alias);
-
- qb.getParseInfo().setSrcForAlias(alias, tableTree);
-
- // if alias to CTE contains the table name, we do not do the translation because
- // cte is actually a subquery.
- if (!this.aliasToCTEs.containsKey(tabIdName)) {
- unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase());
- if (aliasIndex != 0) {
- unparseTranslator.addIdentifierTranslation((ASTNode) tabref.getChild(aliasIndex));
- }
- }
-
- return alias;
- }
-
- Map<String, SplitSample> getNameToSplitSampleMap() {
- return this.nameToSplitSample;
- }
-
- /**
- * Convert a string to Text format and write its bytes in the same way TextOutputFormat would do.
- * This is needed to properly encode non-ascii characters.
- */
- private static void writeAsText(String text, FSDataOutputStream out) throws IOException {
- Text to = new Text(text);
- out.write(to.getBytes(), 0, to.getLength());
- }
-
- /**
- * Generate a temp table out of a values clause
- * See also {@link #preProcessForInsert(ASTNode, QB)}
- */
- private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws SemanticException {
- Path dataDir = null;
- if(!qb.getEncryptedTargetTablePaths().isEmpty()) {
- //currently only Insert into T values(...) is supported thus only 1 values clause
- //and only 1 target table are possible. If/when support for
- //select ... from values(...) is added an insert statement may have multiple
- //encrypted target tables.
- dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri());
- }
- // Pick a name for the table
- SessionState ss = SessionState.get();
- String tableName = VALUES_TMP_TABLE_NAME_PREFIX + ss.getNextValuesTempTableSuffix();
-
- // Step 1, parse the values clause we were handed
- List<? extends Node> fromChildren = originalFrom.getChildren();
- // First child should be the virtual table ref
- ASTNode virtualTableRef = (ASTNode)fromChildren.get(0);
- assert virtualTableRef.getToken().getType() == HiveParser.TOK_VIRTUAL_TABREF :
- "Expected first child of TOK_VIRTUAL_TABLE to be TOK_VIRTUAL_TABREF but was " +
- virtualTableRef.getName();
-
- List<? extends Node> virtualTableRefChildren = virtualTableRef.getChildren();
- // First child of this should be the table name. If it's anonymous,
- // then we don't have a table name.
- ASTNode tabName = (ASTNode)virtualTableRefChildren.get(0);
- if (tabName.getToken().getType() != HiveParser.TOK_ANONYMOUS) {
- // TODO, if you want to make select ... from (values(...) as foo(...) work,
- // you need to parse this list of columns names and build it into the table
- throw new SemanticException(ErrorMsg.VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED.getMsg());
- }
-
- // The second child of the TOK_VIRTUAL_TABLE should be TOK_VALUES_TABLE
- ASTNode valuesTable = (ASTNode)fromChildren.get(1);
- assert valuesTable.getToken().getType() == HiveParser.TOK_VALUES_TABLE :
- "Expected second child of TOK_VIRTUAL_TABLE to be TOK_VALUE_TABLE but was " +
- valuesTable.getName();
- // Each of the children of TOK_VALUES_TABLE will be a TOK_VALUE_ROW
- List<? extends Node> valuesTableChildren = valuesTable.getChildren();
-
- // Now that we're going to start reading through the rows, open a file to write the rows too
- // If we leave this method before creating the temporary table we need to be sure to clean up
- // this file.
- Path tablePath = null;
- FileSystem fs = null;
- FSDataOutputStream out = null;
- try {
- if(dataDir == null) {
- tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
- }
- else {
- //if target table of insert is encrypted, make sure temporary table data is stored
- //similarly encrypted
- tablePath = Warehouse.getDnsPath(new Path(dataDir, tableName), conf);
- }
- fs = tablePath.getFileSystem(conf);
- fs.mkdirs(tablePath);
- Path dataFile = new Path(tablePath, "data_file");
- out = fs.create(dataFile);
- List<FieldSchema> fields = new ArrayList<FieldSchema>();
-
- boolean firstRow = true;
- for (Node n : valuesTableChildren) {
- ASTNode valuesRow = (ASTNode) n;
- assert valuesRow.getToken().getType() == HiveParser.TOK_VALUE_ROW :
- "Expected child of TOK_VALUE_TABLE to be TOK_VALUE_ROW but was " + valuesRow.getName();
- // Each of the children of this should be a literal
- List<? extends Node> valuesRowChildren = valuesRow.getChildren();
- boolean isFirst = true;
- int nextColNum = 1;
- for (Node n1 : valuesRowChildren) {
- ASTNode value = (ASTNode) n1;
- if (firstRow) {
- fields.add(new FieldSchema("tmp_values_col" + nextColNum++, "string", ""));
- }
- if (isFirst) isFirst = false;
- else writeAsText("\u0001", out);
- writeAsText(unparseExprForValuesClause(value), out);
- }
- writeAsText("\n", out);
- firstRow = false;
- }
-
- // Step 2, create a temp table, using the created file as the data
- StorageFormat format = new StorageFormat(conf);
- format.processStorageFormat("TextFile");
- Table table = db.newTable(tableName);
- table.setSerializationLib(format.getSerde());
- table.setFields(fields);
- table.setDataLocation(tablePath);
- table.getTTable().setTemporary(true);
- table.setStoredAsSubDirectories(false);
- table.setInputFormatClass(format.getInputFormat());
- table.setOutputFormatClass(format.getOutputFormat());
- db.createTable(table, false);
- } catch (Exception e) {
- String errMsg = ErrorMsg.INSERT_CANNOT_CREATE_TEMP_FILE.getMsg() + e.getMessage();
- LOG.error(errMsg);
- // Try to delete the file
- if (fs != null && tablePath != null) {
- try {
- fs.delete(tablePath, false);
- } catch (IOException swallowIt) {}
- }
- throw new SemanticException(errMsg, e);
- } finally {
- IOUtils.closeStream(out);
- }
-
- // Step 3, return a new subtree with a from clause built around that temp table
- // The form of the tree is TOK_TABREF->TOK_TABNAME->identifier(tablename)
- Token t = new ClassicToken(HiveParser.TOK_TABREF);
- ASTNode tabRef = new ASTNode(t);
- t = new ClassicToken(HiveParser.TOK_TABNAME);
- ASTNode tabNameNode = new ASTNode(t);
- tabRef.addChild(tabNameNode);
- t = new ClassicToken(HiveParser.Identifier, tableName);
- ASTNode identifier = new ASTNode(t);
- tabNameNode.addChild(identifier);
- return tabRef;
- }
-
- // Take an expression in the values clause and turn it back into a string. This is far from
- // comprehensive. At the moment it only supports:
- // * literals (all types)
- // * unary negatives
- // * true/false
- private String unparseExprForValuesClause(ASTNode expr) throws SemanticException {
- switch (expr.getToken().getType()) {
- case HiveParser.Number:
- return expr.getText();
-
- case HiveParser.StringLiteral:
- return BaseSemanticAnalyzer.unescapeSQLString(expr.getText());
-
- case HiveParser.KW_FALSE:
- // UDFToBoolean casts any non-empty string to true, so set this to false
- return "";
-
- case HiveParser.KW_TRUE:
- return "TRUE";
-
- case HiveParser.MINUS:
- return "-" + unparseExprForValuesClause((ASTNode)expr.getChildren().get(0));
-
- case HiveParser.TOK_NULL:
- // Hive's text input will translate this as a null
- return "\\N";
-
- default:
- throw new SemanticException("Expression of type " + expr.getText() +
- " not supported in insert/values");
- }
-
- }
-
- private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException {
- String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ?
- HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT):
- HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT);
- if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) {
- throw new SemanticException(generateErrorMessage((ASTNode) numerator,
- message + " sampling is not supported in " + inputFormat));
- }
- }
-
- private String processSubQuery(QB qb, ASTNode subq) throws SemanticException {
-
- // This is a subquery and must have an alias
- if (subq.getChildCount() != 2) {
- throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq));
- }
- ASTNode subqref = (ASTNode) subq.getChild(0);
- String alias = unescapeIdentifier(subq.getChild(1).getText());
-
- // Recursively do the first phase of semantic analysis for the subquery
- QBExpr qbexpr = new QBExpr(alias);
-
- doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias, qb.isInsideView());
-
- // If the alias is already there then we have a conflict
- if (qb.exists(alias)) {
- throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq
- .getChild(1)));
- }
- // Insert this map into the stats
- qb.setSubqAlias(alias, qbexpr);
- qb.addAlias(alias);
-
- unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1));
-
- return alias;
- }
-
- /*
- * Phase1: hold onto any CTE definitions in aliasToCTE.
- * CTE definitions are global to the Query.
- */
- private void processCTE(QB qb, ASTNode ctes) throws SemanticException {
-
- int numCTEs = ctes.getChildCount();
-
- for(int i=0; i <numCTEs; i++) {
- ASTNode cte = (ASTNode) ctes.getChild(i);
- ASTNode cteQry = (ASTNode) cte.getChild(0);
- String alias = unescapeIdentifier(cte.getChild(1).getText());
-
- String qName = qb.getId() == null ? "" : qb.getId() + ":";
- qName += alias.toLowerCase();
-
- if ( aliasToCTEs.containsKey(qName)) {
- throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(cte.getChild(1)));
- }
- aliasToCTEs.put(qName, new CTEClause(qName, cteQry));
- }
- }
-
- /*
- * We allow CTE definitions in views. So we can end up with a hierarchy of CTE definitions:
- * - at the top level of a query statement
- * - where a view is referenced.
- * - views may refer to other views.
- *
- * The scoping rules we use are: to search for a CTE from the current QB outwards. In order to
- * disambiguate between CTES are different levels we qualify(prefix) them with the id of the QB
- * they appear in when adding them to the <code>aliasToCTEs</code> map.
- *
- */
- private CTEClause findCTEFromName(QB qb, String cteName) {
- StringBuilder qId = new StringBuilder();
- if (qb.getId() != null) {
- qId.append(qb.getId());
- }
-
- while (qId.length() > 0) {
- String nm = qId + ":" + cteName;
- CTEClause cte = aliasToCTEs.get(nm);
- if (cte != null) {
- return cte;
- }
- int lastIndex = qId.lastIndexOf(":");
- lastIndex = lastIndex < 0 ? 0 : lastIndex;
- qId.setLength(lastIndex);
- }
- return aliasToCTEs.get(cteName);
- }
-
- /*
- * If a CTE is referenced in a QueryBlock:
- * - add it as a SubQuery for now.
- * - SQ.alias is the alias used in QB. (if no alias is specified,
- * it used the CTE name. Works just like table references)
- * - Adding SQ done by:
- * - copying AST of CTE
- * - setting ASTOrigin on cloned AST.
- * - trigger phase 1 on new QBExpr.
- * - update QB data structs: remove this as a table reference, move it to a SQ invocation.
- */
- private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias)
- throws SemanticException {
- cteAlias = cteAlias == null ? cteName : cteAlias;
- CTEClause cte = findCTEFromName(qb, cteName);
- ASTNode cteQryNode = cte.cteNode;
- QBExpr cteQBExpr = new QBExpr(cteAlias);
- doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias);
- qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr);
- }
-
- private final CTEClause rootClause = new CTEClause(null, null);
-
- @Override
- public List<Task<? extends Serializable>> getAllRootTasks() {
- if (!rootTasksResolved) {
- rootTasks = toRealRootTasks(rootClause.asExecutionOrder());
- rootTasksResolved = true;
- }
- return rootTasks;
- }
-
- @Override
- public HashSet<ReadEntity> getAllInputs() {
- HashSet<ReadEntity> readEntities = new HashSet<ReadEntity>(getInputs());
- for (CTEClause cte : rootClause.asExecutionOrder()) {
- if (cte.source != null) {
- readEntities.addAll(cte.source.getInputs());
- }
- }
- return readEntities;
- }
-
- @Override
- public HashSet<WriteEntity> getAllOutputs() {
- HashSet<WriteEntity> writeEntities = new HashSet<WriteEntity>(getOutputs());
- for (CTEClause cte : rootClause.asExecutionOrder()) {
- if (cte.source != null) {
- writeEntities.addAll(cte.source.getOutputs());
- }
- }
- return writeEntities;
- }
-
- class CTEClause {
- CTEClause(String alias, ASTNode cteNode) {
- this.alias = alias;
- this.cteNode = cteNode;
- }
- String alias;
- ASTNode cteNode;
- boolean materialize;
- int reference;
- QBExpr qbExpr;
- List<CTEClause> parents = new ArrayList<CTEClause>();
-
- // materialized
- Table table;
- SemanticAnalyzer source;
-
- List<Task<? extends Serializable>> getTasks() {
- return source == null ? null : source.rootTasks;
- }
-
- List<CTEClause> asExecutionOrder() {
- List<CTEClause> execution = new ArrayList<CTEClause>();
- asExecutionOrder(new HashSet<CTEClause>(), execution);
- return execution;
- }
-
- void asExecutionOrder(Set<CTEClause> visited, List<CTEClause> execution) {
- for (CTEClause parent : parents) {
- if (visited.add(parent)) {
- parent.asExecutionOrder(visited, execution);
- }
- }
- execution.add(this);
- }
-
- @Override
- public String toString() {
- return alias == null ? "<root>" : alias;
- }
- }
-
- private List<Task<? extends Serializable>> toRealRootTasks(List<CTEClause> execution) {
- List<Task<? extends Serializable>> cteRoots = new ArrayList<>();
- List<Task<? extends Serializable>> cteLeafs = new ArrayList<>();
- List<Task<? extends Serializable>> curTopRoots = null;
- List<Task<? extends Serializable>> curBottomLeafs = null;
- for (int i = 0; i < execution.size(); i++) {
- CTEClause current = execution.get(i);
- if (current.parents.isEmpty() && curTopRoots != null) {
- cteRoots.addAll(curTopRoots);
- cteLeafs.addAll(curBottomLeafs);
- curTopRoots = curBottomLeafs = null;
- }
- List<Task<? extends Serializable>> curTasks = current.getTasks();
- if (curTasks == null) {
- continue;
- }
- if (curTopRoots == null) {
- curTopRoots = curTasks;
- }
- if (curBottomLeafs != null) {
- for (Task<?> topLeafTask : curBottomLeafs) {
- for (Task<?> currentRootTask : curTasks) {
- topLeafTask.addDependentTask(currentRootTask);
- }
- }
- }
- curBottomLeafs = Task.findLeafs(curTasks);
- }
- if (curTopRoots != null) {
- cteRoots.addAll(curTopRoots);
- cteLeafs.addAll(curBottomLeafs);
- }
-
- if (cteRoots.isEmpty()) {
- return rootTasks;
- }
- for (Task<?> cteLeafTask : cteLeafs) {
- for (Task<?> mainRootTask : rootTasks) {
- cteLeafTask.addDependentTask(mainRootTask);
- }
- }
- return cteRoots;
- }
-
- Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
-
- ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE));
-
- ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME));
- tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName)));
-
- ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER));
-
- createTable.addChild(tableName);
- createTable.addChild(temporary);
- createTable.addChild(cte.cteNode);
-
- SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState);
- analyzer.initCtx(ctx);
- analyzer.init(false);
-
- // should share cte contexts
- analyzer.aliasToCTEs.putAll(aliasToCTEs);
-
- HiveOperation operation = queryState.getHiveOperation();
- try {
- analyzer.analyzeInternal(createTable);
- } finally {
- queryState.setCommandType(operation);
- }
-
- Table table = analyzer.tableDesc.toTable(conf);
- Path location = table.getDataLocation();
- try {
- location.getFileSystem(conf).mkdirs(location);
- } catch (IOException e) {
- throw new HiveException(e);
- }
- table.setMaterializedTable(true);
-
- LOG.info(cteName + " will be materialized into " + location);
- cte.table = table;
- cte.source = analyzer;
-
- ctx.addMaterializedTable(cteName, table);
-
- return table;
- }
-
-
- static boolean isJoinToken(ASTNode node) {
- if ((node.getToken().getType() == HiveParser.TOK_JOIN)
- || (node.getToken().getType() == HiveParser.TOK_CROSSJOIN)
- || isOuterJoinToken(node)
- || (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
- || (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
- return true;
- }
-
- return false;
- }
-
- static private boolean isOuterJoinToken(ASTNode node) {
- return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
- || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
- || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN);
- }
-
- /**
- * Given the AST with TOK_JOIN as the root, get all the aliases for the tables
- * or subqueries in the join.
- *
- * @param qb
- * @param join
- * @throws SemanticException
- */
- @SuppressWarnings("nls")
- private void processJoin(QB qb, ASTNode join) throws SemanticException {
- int numChildren = join.getChildCount();
- if ((numChildren != 2) && (numChildren != 3)
- && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) {
- throw new SemanticException(generateErrorMessage(join,
- "Join with multiple children"));
- }
-
- queryProperties.incrementJoinCount(isOuterJoinToken(join));
- for (int num = 0; num < numChildren; num++) {
- ASTNode child = (ASTNode) join.getChild(num);
- if (child.getToken().getType() == HiveParser.TOK_TABREF) {
- processTable(qb, child);
- } else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) {
- processSubQuery(qb, child);
- } else if (child.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
- queryProperties.setHasPTF(true);
- processPTF(qb, child);
- PTFInvocationSpec ptfInvocationSpec = qb.getPTFInvocationSpec(child);
- String inputAlias = ptfInvocationSpec == null ? null :
- ptfInvocationSpec.getFunction().getAlias();;
- if ( inputAlias == null ) {
- throw new SemanticException(generateErrorMessage(child,
- "PTF invocation in a Join must have an alias"));
- }
-
- } else if (child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
- child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
- // SELECT * FROM src1 LATERAL VIEW udtf() AS myTable JOIN src2 ...
- // is not supported. Instead, the lateral view must be in a subquery
- // SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a
- // JOIN src2 ...
- throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN
- .getMsg(join));
- } else if (isJoinToken(child)) {
- processJoin(qb, child);
- }
- }
- }
-
- /**
- * Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the
- * table or subquery in the lateral view and also make a mapping from the
- * alias to all the lateral view AST's.
- *
- * @param qb
- * @param lateralView
- * @return the alias for the table/subquery
- * @throws SemanticException
- */
-
- private String processLateralView(QB qb, ASTNode lateralView)
- throws SemanticException {
- int numChildren = lateralView.getChildCount();
-
- assert (numChildren == 2);
- ASTNode next = (ASTNode) lateralView.getChild(1);
-
- String alias = null;
-
- switch (next.getToken().getType()) {
- case HiveParser.TOK_TABREF:
- alias = processTable(qb, next);
- break;
- case HiveParser.TOK_SUBQUERY:
- alias = processSubQuery(qb, next);
- break;
- case HiveParser.TOK_LATERAL_VIEW:
- case HiveParser.TOK_LATERAL_VIEW_OUTER:
- alias = processLateralView(qb, next);
- break;
- default:
- throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD
- .getMsg(lateralView));
- }
- alias = alias.toLowerCase();
- qb.getParseInfo().addLateralViewForAlias(alias, lateralView);
- qb.addAlias(alias);
- return alias;
- }
-
- /**
- * Phase 1: (including, but not limited to):
- *
- * 1. Gets all the aliases for all the tables / subqueries and makes the
- * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
- * destination and names the clause "inclause" + i 3. Creates a map from a
- * string representation of an aggregation tree to the actual aggregation AST
- * 4. Creates a mapping from the clause name to the select expression AST in
- * destToSelExpr 5. Creates a mapping from a table alias to the lateral view
- * AST's in aliasToLateralViews
- *
- * @param ast
- * @param qb
- * @param ctx_1
- * @throws SemanticException
- */
- @SuppressWarnings({"fallthrough", "nls"})
- public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx)
- throws SemanticException {
-
- boolean phase1Result = true;
- QBParseInfo qbp = qb.getParseInfo();
- boolean skipRecursion = false;
-
- if (ast.getToken() != null) {
- skipRecursion = true;
- switch (ast.getToken().getType()) {
- case HiveParser.TOK_SELECTDI:
- qb.countSelDi();
- // fall through
- case HiveParser.TOK_SELECT:
- qb.countSel();
- qbp.setSelExprForClause(ctx_1.dest, ast);
-
- int posn = 0;
- if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.QUERY_HINT) {
- ParseDriver pd = new ParseDriver();
- String queryHintStr = ast.getChild(0).getText();
- if (LOG.isDebugEnabled()) {
- LOG.debug("QUERY HINT: "+queryHintStr);
- }
- try {
- ASTNode hintNode = pd.parseHint(queryHintStr);
- qbp.setHints((ASTNode) hintNode);
- posn++;
- } catch (ParseException e) {
- throw new SemanticException("failed to parse query hint: "+e.getMessage(), e);
- }
- }
-
- if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM))
- queryProperties.setUsesScript(true);
-
- LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast,
- qb, ctx_1.dest);
- doPhase1GetColumnAliasesFromSelect(ast, qbp);
- qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
- qbp.setDistinctFuncExprsForClause(ctx_1.dest,
- doPhase1GetDistinctFuncExprs(aggregations));
- break;
-
- case HiveParser.TOK_WHERE:
- qbp.setWhrExprForClause(ctx_1.dest, ast);
- if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty())
- queryProperties.setFilterWithSubQuery(true);
- break;
-
- case HiveParser.TOK_INSERT_INTO:
- String currentDatabase = SessionState.get().getCurrentDatabase();
- String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
- qbp.addInsertIntoTable(tab_name, ast);
-
- case HiveParser.TOK_DESTINATION:
- ctx_1.dest = this.ctx.getDestNamePrefix(ast).toString() + ctx_1.nextNum;
- ctx_1.nextNum++;
- boolean isTmpFileDest = false;
- if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {
- ASTNode ch = (ASTNode) ast.getChild(0);
- if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0
- && ch.getChild(0) instanceof ASTNode) {
- ch = (ASTNode) ch.getChild(0);
- isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE;
- } else {
- if (ast.getToken().getType() == HiveParser.TOK_DESTINATION
- && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
- String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
- SessionState.get().getCurrentDatabase());
- qbp.getInsertOverwriteTables().put(fullTableName, ast);
- }
- }
- }
-
- // is there a insert in the subquery
- if (qbp.getIsSubQ() && !isTmpFileDest) {
- throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
- }
-
- qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
- handleInsertStatementSpecPhase1(ast, qbp, ctx_1);
-
- if (qbp.getClauseNamesForDest().size() == 2) {
- // From the moment that we have two destination clauses,
- // we know that this is a multi-insert query.
- // Thus, set property to right value.
- // Using qbp.getClauseNamesForDest().size() >= 2 would be
- // equivalent, but we use == to avoid setting the property
- // multiple times
- queryProperties.setMultiDestQuery(true);
- }
-
- if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) {
- plannerCtx.setInsertToken(ast, isTmpFileDest);
- } else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) {
- // For multi-insert query, currently we only optimize the FROM clause.
- // Hence, introduce multi-insert token on top of it.
- // However, first we need to reset existing token (insert).
- // Using qbp.getClauseNamesForDest().size() >= 2 would be
- // equivalent, but we use == to avoid setting the property
- // multiple times
- plannerCtx.resetToken();
- plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0));
- }
- break;
-
- case HiveParser.TOK_FROM:
- int child_count = ast.getChildCount();
- if (child_count != 1) {
- throw new SemanticException(generateErrorMessage(ast,
- "Multiple Children " + child_count));
- }
-
- if (!qbp.getIsSubQ()) {
- qbp.setQueryFromExpr(ast);
- }
-
- // Check if this is a subquery / lateral view
- ASTNode frm = (ASTNode) ast.getChild(0);
- if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
- processTable(qb, frm);
- } else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) {
- // Create a temp table with the passed values in it then rewrite this portion of the
- // tree to be from that table.
- ASTNode newFrom = genValuesTempTable(frm, qb);
- ast.setChild(0, newFrom);
- processTable(qb, newFrom);
- } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
- processSubQuery(qb, frm);
- } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
- frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
- queryProperties.setHasLateralViews(true);
- processLateralView(qb, frm);
- } else if (isJoinToken(frm)) {
- processJoin(qb, frm);
- qbp.setJoinExpr(frm);
- }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
- queryProperties.setHasPTF(true);
- processPTF(qb, frm);
- }
- break;
-
- case HiveParser.TOK_CLUSTERBY:
- // Get the clusterby aliases - these are aliased to the entries in the
- // select list
- queryProperties.setHasClusterBy(true);
- qbp.setClusterByExprForClause(ctx_1.dest, ast);
- break;
-
- case HiveParser.TOK_DISTRIBUTEBY:
- // Get the distribute by aliases - these are aliased to the entries in
- // the
- // select list
- queryProperties.setHasDistributeBy(true);
- qbp.setDistributeByExprForClause(ctx_1.dest, ast);
- if (qbp.getClusterByForClause(ctx_1.dest) != null) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
- } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
- }
- break;
-
- case HiveParser.TOK_SORTBY:
- // Get the sort by aliases - these are aliased to the entries in the
- // select list
- queryProperties.setHasSortBy(true);
- qbp.setSortByExprForClause(ctx_1.dest, ast);
- if (qbp.getClusterByForClause(ctx_1.dest) != null) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
- } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
- }
-
- break;
-
- case HiveParser.TOK_ORDERBY:
- // Get the order by aliases - these are aliased to the entries in the
- // select list
- queryProperties.setHasOrderBy(true);
- qbp.setOrderByExprForClause(ctx_1.dest, ast);
- if (qbp.getClusterByForClause(ctx_1.dest) != null) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
- }
- break;
-
- case HiveParser.TOK_GROUPBY:
- case HiveParser.TOK_ROLLUP_GROUPBY:
- case HiveParser.TOK_CUBE_GROUPBY:
- case HiveParser.TOK_GROUPING_SETS:
- // Get the groupby aliases - these are aliased to the entries in the
- // select list
- queryProperties.setHasGroupBy(true);
- if (qbp.getJoinExpr() != null) {
- queryProperties.setHasJoinFollowedByGroupBy(true);
- }
- if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
- }
- qbp.setGroupByExprForClause(ctx_1.dest, ast);
- skipRecursion = true;
-
- // Rollup and Cubes are syntactic sugar on top of grouping sets
- if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
- qbp.getDestRollups().add(ctx_1.dest);
- } else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
- qbp.getDestCubes().add(ctx_1.dest);
- } else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
- qbp.getDestGroupingSets().add(ctx_1.dest);
- }
- break;
-
- case HiveParser.TOK_HAVING:
- qbp.setHavingExprForClause(ctx_1.dest, ast);
- qbp.addAggregationExprsForClause(ctx_1.dest,
- doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
- break;
-
- case HiveParser.KW_WINDOW:
- if (!qb.hasWindowingSpec(ctx_1.dest) ) {
- throw new SemanticException(generateErrorMessage(ast,
- "Query has no Cluster/Distribute By; but has a Window definition"));
- }
- handleQueryWindowClauses(qb, ctx_1, ast);
- break;
-
- case HiveParser.TOK_LIMIT:
- if (ast.getChildCount() == 2) {
- qbp.setDestLimit(ctx_1.dest,
- new Integer(ast.getChild(0).getText()),
- new Integer(ast.getChild(1).getText()));
- } else {
- qbp.setDestLimit(ctx_1.dest, new Integer(0),
- new Integer(ast.getChild(0).getText()));
- }
- break;
-
- case HiveParser.TOK_ANALYZE:
- // Case of analyze command
-
- String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase();
-
-
- qb.setTabAlias(table_name, table_name);
- qb.addAlias(table_name);
- qb.getParseInfo().setIsAnalyzeCommand(true);
- qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
- qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan);
- // Allow analyze the whole table and dynamic partitions
- HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
- HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
-
- break;
-
- case HiveParser.TOK_UNIONALL:
- if (!qbp.getIsSubQ()) {
- // this shouldn't happen. The parser should have converted the union to be
- // contained in a subquery. Just in case, we keep the error as a fallback.
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
- }
- skipRecursion = false;
- break;
-
- case HiveParser.TOK_INSERT:
- ASTNode destination = (ASTNode) ast.getChild(0);
- Tree tab = destination.getChild(0);
-
- // Proceed if AST contains partition & If Not Exists
- if (destination.getChildCount() == 2 &&
- tab.getChildCount() == 2 &&
- destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
- String tableName = tab.getChild(0).getChild(0).getText();
-
- Tree partitions = tab.getChild(1);
- int childCount = partitions.getChildCount();
- HashMap<String, String> partition = new HashMap<String, String>();
- for (int i = 0; i < childCount; i++) {
- String partitionName = partitions.getChild(i).getChild(0).getText();
- Tree pvalue = partitions.getChild(i).getChild(1);
- if (pvalue == null) {
- break;
- }
- String partitionVal = stripQuotes(pvalue.getText());
- partition.put(partitionName, partitionVal);
- }
- // if it is a dynamic partition throw the exception
- if (childCount != partition.size()) {
- throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
- .getMsg(partition.toString()));
- }
- Table table = null;
- try {
- table = this.getTableObjectByName(tableName);
- } catch (HiveException ex) {
- throw new SemanticException(ex);
- }
- try {
- Partition parMetaData = db.getPartition(table, partition, false);
- // Check partition exists if it exists skip the overwrite
- if (parMetaData != null) {
- phase1Result = false;
- skipRecursion = true;
- LOG.info("Partition already exists so insert into overwrite " +
- "skipped for partition : " + parMetaData.toString());
- break;
- }
- } catch (HiveException e) {
- LOG.info("Error while getting metadata : ", e);
- }
- validatePartSpec(table, partition, (ASTNode)tab, conf, false);
- }
- skipRecursion = false;
- break;
- case HiveParser.TOK_LATERAL_VIEW:
- case HiveParser.TOK_LATERAL_VIEW_OUTER:
- // todo: nested LV
- assert ast.getChildCount() == 1;
- qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
- break;
- case HiveParser.TOK_CTE:
- processCTE(qb, ast);
- break;
- default:
- skipRecursion = false;
- break;
- }
- }
-
- if (!skipRecursion) {
- // Iterate over the rest of the children
- int child_count = ast.getChildCount();
- for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
- // Recurse
- phase1Result = phase1Result && doPhase1(
- (ASTNode)ast.getChild(child_pos), qb, ctx_1, plannerCtx);
- }
- }
- return phase1Result;
- }
-
- /**
- * This is phase1 of supporting specifying schema in insert statement
- * insert into foo(z,y) select a,b from bar;
- * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode)
- * @throws SemanticException
- */
- private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException {
- ASTNode tabColName = (ASTNode)ast.getChild(1);
- if(ast.getType() == HiveParser.TOK_INSERT_INTO && tabColName != null && tabColName.getType() == HiveParser.TOK_TABCOLNAME) {
- //we have "insert into foo(a,b)..."; parser will enforce that 1+ columns are listed if TOK_TABCOLNAME is present
- List<String> targetColNames = new ArrayList<String>();
- for(Node col : tabColName.getChildren()) {
- assert ((ASTNode)col).getType() == HiveParser.Identifier :
- "expected token " + HiveParser.Identifier + " found " + ((ASTNode)col).getType();
- targetColNames.add(((ASTNode)col).getText());
- }
- String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
- SessionState.get().getCurrentDatabase());
- qbp.setDestSchemaForClause(ctx_1.dest, targetColNames);
- Set<String> targetColumns = new HashSet<String>();
- targetColumns.addAll(targetColNames);
- if(targetColNames.size() != targetColumns.size()) {
- throw new SemanticException(generateErrorMessage(tabColName,
- "Duplicate column name detected in " + fullTableName + " table schema specification"));
- }
- Table targetTable = null;
- try {
- targetTable = db.getTable(fullTableName, false);
- }
- catch (HiveException ex) {
- LOG.error("Error processing HiveParser.TOK_DESTINATION: " + ex.getMessage(), ex);
- throw new SemanticException(ex);
- }
- if(targetTable == null) {
- throw new SemanticException(generateErrorMessage(ast,
- "Unable to access metadata for table " + fullTableName));
- }
- for(FieldSchema f : targetTable.getCols()) {
- //parser only allows foo(a,b), not foo(foo.a, foo.b)
- targetColumns.remove(f.getName());
- }
- if(!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns
- /* We just checked the user specified schema columns among regular table column and found some which are not
- 'regular'. Now check is they are dynamic partition columns
- For dynamic partitioning,
- Given "create table multipart(a int, b int) partitioned by (c int, d int);"
- for "insert into multipart partition(c='1',d)(d,a) values(2,3);" we expect parse tree to look like this
- (TOK_INSERT_INTO
- (TOK_TAB
- (TOK_TABNAME multipart)
- (TOK_PARTSPEC
- (TOK_PARTVAL c '1')
- (TOK_PARTVAL d)
- )
- )
- (TOK_TABCOLNAME d a)
- )*/
- List<String> dynamicPartitionColumns = new ArrayList<String>();
- if(ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
- ASTNode tokTab = (ASTNode)ast.getChild(0);
- ASTNode tokPartSpec = (ASTNode)tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC);
- if(tokPartSpec != null) {
- for(Node n : tokPartSpec.getChildren()) {
- ASTNode tokPartVal = null;
- if(n instanceof ASTNode) {
- tokPartVal = (ASTNode)n;
- }
- if(tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL && tokPartVal.getChildCount() == 1) {
- assert tokPartVal.getChild(0).getType() == HiveParser.Identifier :
- "Expected column name; found tokType=" + tokPartVal.getType();
- dynamicPartitionColumns.add(tokPartVal.getChild(0).getText());
- }
- }
- }
- }
- for(String colName : dynamicPartitionColumns) {
- targetColumns.remove(colName);
- }
- if(!targetColumns.isEmpty()) {
- //Found some columns in user specified schema which are neither regular not dynamic partition columns
- throw new SemanticException(generateErrorMessage(tabColName,
- "'" + (targetColumns.size() == 1 ? targetColumns.iterator().next() : targetColumns) +
- "' in insert schema specification " + (targetColumns.size() == 1 ? "is" : "are") +
- " not found among regular columns of " +
- fullTableName + " nor dynamic partition columns."));
- }
- }
- }
- }
-
- public void getMaterializationMetadata(QB qb) throws SemanticException {
- try {
- gatherCTEReferences(qb, rootClause);
- int threshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD);
- for (CTEClause cte : Sets.newHashSet(aliasToCTEs.values())) {
- if (threshold >= 0 && cte.reference >= threshold) {
- cte.materialize = true;
- }
- }
- } catch (HiveException e) {
- // Has to use full name to make sure it does not conflict with
- // org.apache.commons.lang.StringUtils
- LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
- if (e instanceof SemanticException) {
- throw (SemanticException)e;
- }
- throw new SemanticException(e.getMessage(), e);
- }
- }
-
- private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent) throws HiveException {
- if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
- gatherCTEReferences(qbexpr.getQB(), parent);
- } else {
- gatherCTEReferences(qbexpr.getQBExpr1(), parent);
- gatherCTEReferences(qbexpr.getQBExpr2(), parent);
- }
- }
-
- // TODO: check view references, too
- private void gatherCTEReferences(QB qb, CTEClause current) throws HiveException {
- for (String alias : qb.getTabAliases()) {
- String tabName = qb.getTabNameForAlias(alias);
- String cteName = tabName.toLowerCase();
-
- CTEClause cte = findCTEFromName(qb, cteName);
- if (cte != null) {
- if (ctesExpanded.contains(cteName)) {
- throw new SemanticException("Recursive cte " + cteName +
- " detected (cycle: " + StringUtils.join(ctesExpanded, " -> ") +
- " -> " + cteName + ").");
- }
- cte.reference++;
- current.parents.add(cte);
- if (cte.qbExpr != null) {
- continue;
- }
- cte.qbExpr = new QBExpr(cteName);
- doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName);
-
- ctesExpanded.add(cteName);
- gatherCTEReferences(cte.qbExpr, cte);
- ctesExpanded.remove(ctesExpanded.size() - 1);
- }
- }
- for (String alias : qb.getSubqAliases()) {
- gatherCTEReferences(qb.getSubqForAlias(alias), current);
- }
- }
-
- public void getMetaData(QB qb) throws SemanticException {
- getMetaData(qb, false);
- }
-
- public void getMetaData(QB qb, boolean enableMaterialization) throws SemanticException {
- try {
- if (enableMaterialization) {
- getMaterializationMetadata(qb);
- }
- getMetaData(qb, null);
- } catch (HiveException e) {
- // Has to use full name to make sure it does not conflict with
- // org.apache.commons.lang.StringUtils
- LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
- if (e instanceof SemanticException) {
- throw (SemanticException)e;
- }
- throw new SemanticException(e.getMessage(), e);
- }
- }
-
- private void getMetaData(QBExpr qbexpr, ReadEntity parentInput)
- throws HiveException {
- if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
- getMetaData(qbexpr.getQB(), parentInput);
- } else {
- getMetaData(qbexpr.getQBExpr1(), parentInput);
- getMetaData(qbexpr.getQBExpr2(), parentInput);
- }
- }
-
- @SuppressWarnings("nls")
- private void getMetaData(QB qb, ReadEntity parentInput)
- throws HiveException {
- LOG.info("Get metadata for source tables");
-
- // Go over the tables and populate the related structures.
- // We have to materialize the table alias list since we might
- // modify it in the middle for view rewrite.
- List<String> tabAliases = new ArrayList<String>(qb.getTabAliases());
-
- // Keep track of view alias to view name and read entity
- // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
- // keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
- // This is needed for tracking the dependencies for inputs, along with their parents.
- Map<String, ObjectPair<String, ReadEntity>> aliasToViewInfo =
- new HashMap<String, ObjectPair<String, ReadEntity>>();
-
- /*
- * used to capture view to SQ conversions. This is used to check for
- * recursive CTE invocations.
- */
- Map<String, String> sqAliasToCTEName = new HashMap<String, String>();
-
- for (String alias : tabAliases) {
- String tabName = qb.getTabNameForAlias(alias);
- String cteName = tabName.toLowerCase();
-
- Table tab = db.getTable(tabName, false);
- if (tab == null ||
- tab.getDbName().equals(SessionState.get().getCurrentDatabase())) {
- Table materializedTab = ctx.getMaterializedTable(cteName);
- if (materializedTab == null) {
- // we first look for this alias from CTE, and then from catalog.
- CTEClause cte = findCTEFromName(qb, cteName);
- if (cte != null) {
- if (!cte.materialize) {
- addCTEAsSubQuery(qb, cteName, alias);
- sqAliasToCTEName.put(alias, cteName);
- continue;
- }
- tab = materializeCTE(cteName, cte);
- }
- } else {
- tab = materializedTab;
- }
- }
-
- if (tab == null) {
- ASTNode src = qb.getParseInfo().getSrcForAlias(alias);
- if (null != src) {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src));
- } else {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
- }
- }
- if (tab.isView()) {
- if (qb.getParseInfo().isAnalyzeCommand()) {
- throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
- }
- String fullViewName = tab.getDbName() + "." + tab.getTableName();
- // Prevent view cycles
- if (viewsExpanded.contains(fullViewName)) {
- throw new SemanticException("Recursive view " + fullViewName +
- " detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") +
- " -> " + fullViewName + ").");
- }
- replaceViewReferenceWithDefinition(qb, tab, tabName, alias);
- // This is the last time we'll see the Table objects for views, so add it to the inputs
- // now. isInsideView will tell if this view is embedded in another view.
- // If the view is Inside another view, it should have at least one parent
- if (qb.isInsideView() && parentInput == null) {
- parentInput = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
- }
- ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView());
- viewInput = PlanUtils.addInput(inputs, viewInput);
- aliasToViewInfo.put(alias, new ObjectPair<String, ReadEntity>(fullViewName, viewInput));
- String aliasId = getAliasId(alias, qb);
- if (aliasId != null) {
- aliasId = aliasId.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "")
- .replace(SemanticAnalyzer.SUBQUERY_TAG_2, "");
- }
- viewAliasToInput.put(aliasId, viewInput);
- continue;
- }
-
- if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
- throw new SemanticException(generateErrorMessage(
- qb.getParseInfo().getSrcForAlias(alias),
- ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
- }
-
- qb.getMetaData().setSrcForAlias(alias, tab);
-
- if (qb.getParseInfo().isAnalyzeCommand()) {
- // allow partial partition specification for nonscan since noscan is fast.
- TableSpec ts = new TableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan);
- if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions
- try {
- ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
- } catch (HiveException e) {
- throw new SemanticException(generateErrorMessage(
- qb.getParseInfo().getSrcForAlias(alias),
- "Cannot get partitions for " + ts.partSpec), e);
- }
- }
- // validate partial scan command
- QBParseInfo qbpi = qb.getParseInfo();
- if (qbpi.isPartialScanAnalyzeCommand()) {
- Class<? extends InputFormat> inputFormatClass = null;
- switch (ts.specType) {
- case TABLE_ONLY:
- case DYNAMIC_PARTITION:
- inputFormatClass = ts.tableHandle.getInputFormatClass();
- break;
- case STATIC_PARTITION:
- inputFormatClass = ts.partHandle.getInputFormatClass();
- break;
- default:
- assert false;
- }
- // throw a HiveException for formats other than rcfile or orcfile.
- if (!(inputFormatClass.equals(RCFileInputFormat.class) || inputFormatClass
- .equals(OrcInputFormat.class))) {
- throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_NON_RCFILE.getMsg());
- }
- }
-
- tab.setTableSpec(ts);
- qb.getParseInfo().addTableSpec(alias, ts);
- }
-
- ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
- // Temporary tables created during the execution are not the input sources
- if (!PlanUtils.isValuesTempTable(alias)) {
- PlanUtils.addInput(inputs,
- new ReadEntity(tab, parentViewInfo, parentViewInfo == null),mergeIsDirect);
- }
- }
-
- LOG.info("Get metadata for subqueries");
- // Go over the subqueries and getMetaData for these
- for (String alias : qb.getSubqAliases()) {
- boolean wasView = aliasToViewInfo.containsKey(alias);
- boolean wasCTE = sqAliasToCTEName.containsKey(alias);
- ReadEntity newParentInput = null;
- if (wasView) {
- viewsExpanded.add(aliasToViewInfo.get(alias).getFirst());
- newParentInput = aliasToViewInfo.get(alias).getSecond();
- } else if (wasCTE) {
- ctesExpanded.add(sqAliasToCTEName.get(alias));
-
<TRUNCATED>
[2/3] hive git commit: HIVE-16501 : Add rej/orig to .gitignore ;
remove *.orig files (Zoltan Haindrich via Ashutosh Chauhan)
Posted by ha...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/c911f420/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java.orig
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java.orig b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java.orig
deleted file mode 100644
index c97b3e7..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java.orig
+++ /dev/null
@@ -1,4188 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.parse;
-
-import java.io.IOException;
-import java.lang.reflect.Field;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.UndeclaredThrowableException;
-import java.math.BigDecimal;
-import java.util.AbstractMap.SimpleEntry;
-import java.util.ArrayDeque;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Deque;
-import java.util.EnumSet;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.antlr.runtime.ClassicToken;
-import org.antlr.runtime.CommonToken;
-import org.antlr.runtime.tree.TreeVisitor;
-import org.antlr.runtime.tree.TreeVisitorAction;
-import org.apache.calcite.adapter.druid.DruidQuery;
-import org.apache.calcite.adapter.druid.DruidRules;
-import org.apache.calcite.adapter.druid.DruidSchema;
-import org.apache.calcite.adapter.druid.DruidTable;
-import org.apache.calcite.adapter.druid.LocalInterval;
-import org.apache.calcite.config.CalciteConnectionConfigImpl;
-import org.apache.calcite.config.CalciteConnectionProperty;
-import org.apache.calcite.plan.RelOptCluster;
-import org.apache.calcite.plan.RelOptMaterialization;
-import org.apache.calcite.plan.RelOptPlanner;
-import org.apache.calcite.plan.RelOptRule;
-import org.apache.calcite.plan.RelOptSchema;
-import org.apache.calcite.plan.RelOptUtil;
-import org.apache.calcite.plan.RelTraitSet;
-import org.apache.calcite.plan.hep.HepMatchOrder;
-import org.apache.calcite.plan.hep.HepPlanner;
-import org.apache.calcite.plan.hep.HepProgram;
-import org.apache.calcite.plan.hep.HepProgramBuilder;
-import org.apache.calcite.rel.RelCollation;
-import org.apache.calcite.rel.RelCollationImpl;
-import org.apache.calcite.rel.RelCollations;
-import org.apache.calcite.rel.RelFieldCollation;
-import org.apache.calcite.rel.RelNode;
-import org.apache.calcite.rel.core.Aggregate;
-import org.apache.calcite.rel.core.AggregateCall;
-import org.apache.calcite.rel.core.Filter;
-import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.SetOp;
-import org.apache.calcite.rel.core.TableScan;
-import org.apache.calcite.rel.metadata.CachingRelMetadataProvider;
-import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
-import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
-import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
-import org.apache.calcite.rel.metadata.RelMetadataProvider;
-import org.apache.calcite.rel.metadata.RelMetadataQuery;
-import org.apache.calcite.rel.rules.FilterMergeRule;
-import org.apache.calcite.rel.rules.JoinToMultiJoinRule;
-import org.apache.calcite.rel.rules.LoptOptimizeJoinRule;
-import org.apache.calcite.rel.rules.ProjectMergeRule;
-import org.apache.calcite.rel.rules.ProjectRemoveRule;
-import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule;
-import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule;
-import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule;
-import org.apache.calcite.rel.rules.UnionMergeRule;
-import org.apache.calcite.rel.type.RelDataType;
-import org.apache.calcite.rel.type.RelDataTypeFactory;
-import org.apache.calcite.rel.type.RelDataTypeField;
-import org.apache.calcite.rel.type.RelDataTypeImpl;
-import org.apache.calcite.rex.RexBuilder;
-import org.apache.calcite.rex.RexExecutor;
-import org.apache.calcite.rex.RexFieldCollation;
-import org.apache.calcite.rex.RexInputRef;
-import org.apache.calcite.rex.RexNode;
-import org.apache.calcite.rex.RexUtil;
-import org.apache.calcite.rex.RexWindowBound;
-import org.apache.calcite.schema.SchemaPlus;
-import org.apache.calcite.sql.SqlAggFunction;
-import org.apache.calcite.sql.SqlCall;
-import org.apache.calcite.sql.SqlExplainLevel;
-import org.apache.calcite.sql.SqlKind;
-import org.apache.calcite.sql.SqlLiteral;
-import org.apache.calcite.sql.SqlNode;
-import org.apache.calcite.sql.SqlOperator;
-import org.apache.calcite.sql.SqlWindow;
-import org.apache.calcite.sql.parser.SqlParserPos;
-import org.apache.calcite.sql.type.SqlTypeName;
-import org.apache.calcite.tools.Frameworks;
-import org.apache.calcite.util.CompositeList;
-import org.apache.calcite.util.ImmutableBitSet;
-import org.apache.calcite.util.ImmutableIntList;
-import org.apache.calcite.util.Pair;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.ObjectPair;
-import org.apache.hadoop.hive.conf.Constants;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.ql.ErrorMsg;
-import org.apache.hadoop.hive.ql.QueryProperties;
-import org.apache.hadoop.hive.ql.QueryState;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.OperatorFactory;
-import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.lib.Node;
-import org.apache.hadoop.hive.ql.log.PerfLogger;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
-import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
-import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
-import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf;
-import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregatePullUpConstantsRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExceptRewriteRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterAggregateTransposeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTSTransposeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSortTransposeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveIntersectMergeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveIntersectRewriteRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectFilterPullUpConstantsRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectOverIntersectRemoveRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsWithStatsRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortLimitPullUpConstantsRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSubQueryRemoveRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewFilterScanRule;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinCondTypeCheckProcFactory;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinTypeCheckCtx;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.PlanModifierForReturnPath;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
-import org.apache.hadoop.hive.ql.parse.QBExpr.Opcode;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
-import org.apache.hadoop.hive.ql.plan.GroupByDesc;
-import org.apache.hadoop.hive.ql.plan.HiveOperation;
-import org.apache.hadoop.hive.ql.plan.SelectDesc;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-
-import com.google.common.base.Function;
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableList.Builder;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Multimap;
-import org.apache.calcite.config.CalciteConnectionConfig;
-
-public class CalcitePlanner extends SemanticAnalyzer {
-
- private final AtomicInteger noColsMissingStats = new AtomicInteger(0);
- private SemanticException semanticException;
- private boolean runCBO = true;
- private boolean disableSemJoinReordering = true;
- private EnumSet<ExtendedCBOProfile> profilesCBO;
-
- public CalcitePlanner(QueryState queryState) throws SemanticException {
- super(queryState);
- if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED)) {
- runCBO = false;
- disableSemJoinReordering = false;
- }
- }
-
- public void resetCalciteConfiguration() {
- if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED)) {
- runCBO = true;
- disableSemJoinReordering = true;
- }
- }
-
- @Override
- @SuppressWarnings("nls")
- public void analyzeInternal(ASTNode ast) throws SemanticException {
- if (runCBO) {
- PreCboCtx cboCtx = new PreCboCtx();
- super.analyzeInternal(ast, cboCtx);
- } else {
- super.analyzeInternal(ast);
- }
- }
-
- /**
- * This method is useful if we want to obtain the logical plan after being parsed and
- * optimized by Calcite.
- *
- * @return the Calcite plan for the query, null if it could not be generated
- */
- public RelNode genLogicalPlan(ASTNode ast) throws SemanticException {
- LOG.info("Starting generating logical plan");
- PreCboCtx cboCtx = new PreCboCtx();
- //change the location of position alias process here
- processPositionAlias(ast);
- if (!genResolvedParseTree(ast, cboCtx)) {
- return null;
- }
- ASTNode queryForCbo = ast;
- if (cboCtx.type == PreCboCtx.Type.CTAS || cboCtx.type == PreCboCtx.Type.VIEW) {
- queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query
- }
- runCBO = canCBOHandleAst(queryForCbo, getQB(), cboCtx);
- if (!runCBO) {
- return null;
- }
- profilesCBO = obtainCBOProfiles(queryProperties);
- disableJoinMerge = true;
- final RelNode resPlan = logicalPlan();
- LOG.info("Finished generating logical plan");
- return resPlan;
- }
-
- @Override
- @SuppressWarnings("rawtypes")
- Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
- Operator sinkOp = null;
- boolean skipCalcitePlan = false;
-
- if (!runCBO) {
- skipCalcitePlan = true;
- } else {
- PreCboCtx cboCtx = (PreCboCtx) plannerCtx;
-
- // Note: for now, we don't actually pass the queryForCbo to CBO, because
- // it accepts qb, not AST, and can also access all the private stuff in
- // SA. We rely on the fact that CBO ignores the unknown tokens (create
- // table, destination), so if the query is otherwise ok, it is as if we
- // did remove those and gave CBO the proper AST. That is kinda hacky.
- ASTNode queryForCbo = ast;
- if (cboCtx.type == PreCboCtx.Type.CTAS || cboCtx.type == PreCboCtx.Type.VIEW) {
- queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query
- }
- runCBO = canCBOHandleAst(queryForCbo, getQB(), cboCtx);
- if (queryProperties.hasMultiDestQuery()) {
- handleMultiDestQuery(ast, cboCtx);
- }
-
- if (runCBO) {
- profilesCBO = obtainCBOProfiles(queryProperties);
-
- disableJoinMerge = true;
- boolean reAnalyzeAST = false;
- final boolean materializedView = getQB().isMaterializedView();
-
- try {
- if (this.conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
- if (cboCtx.type == PreCboCtx.Type.VIEW && !materializedView) {
- throw new SemanticException("Create view is not supported in cbo return path.");
- }
- sinkOp = getOptimizedHiveOPDag();
- LOG.info("CBO Succeeded; optimized logical plan.");
- this.ctx.setCboInfo("Plan optimized by CBO.");
- this.ctx.setCboSucceeded(true);
- } else {
- // 1. Gen Optimized AST
- ASTNode newAST = getOptimizedAST();
-
- // 1.1. Fix up the query for insert/ctas/materialized views
- newAST = fixUpAfterCbo(ast, newAST, cboCtx);
-
- // 2. Regen OP plan from optimized AST
- if (cboCtx.type == PreCboCtx.Type.VIEW && !materializedView) {
- try {
- handleCreateViewDDL(newAST);
- } catch (SemanticException e) {
- throw new CalciteViewSemanticException(e.getMessage());
- }
- } else {
- init(false);
- if (cboCtx.type == PreCboCtx.Type.VIEW && materializedView) {
- // Redo create-table/view analysis, because it's not part of
- // doPhase1.
- // Use the REWRITTEN AST
- setAST(newAST);
- newAST = reAnalyzeViewAfterCbo(newAST);
- // Store text of the ORIGINAL QUERY
- String originalText = ctx.getTokenRewriteStream().toString(
- cboCtx.nodeOfInterest.getTokenStartIndex(),
- cboCtx.nodeOfInterest.getTokenStopIndex());
- createVwDesc.setViewOriginalText(originalText);
- viewSelect = newAST;
- viewsExpanded = new ArrayList<>();
- viewsExpanded.add(createVwDesc.getViewName());
- } else if (cboCtx.type == PreCboCtx.Type.CTAS) {
- // CTAS
- setAST(newAST);
- newAST = reAnalyzeCTASAfterCbo(newAST);
- }
- }
- Phase1Ctx ctx_1 = initPhase1Ctx();
- if (!doPhase1(newAST, getQB(), ctx_1, null)) {
- throw new RuntimeException("Couldn't do phase1 on CBO optimized query plan");
- }
- // unfortunately making prunedPartitions immutable is not possible
- // here with SemiJoins not all tables are costed in CBO, so their
- // PartitionList is not evaluated until the run phase.
- getMetaData(getQB());
-
- disableJoinMerge = defaultJoinMerge;
- sinkOp = genPlan(getQB());
- LOG.info("CBO Succeeded; optimized logical plan.");
- this.ctx.setCboInfo("Plan optimized by CBO.");
- this.ctx.setCboSucceeded(true);
- if (LOG.isTraceEnabled()) {
- LOG.trace(newAST.dump());
- }
- }
- } catch (Exception e) {
- boolean isMissingStats = noColsMissingStats.get() > 0;
- if (isMissingStats) {
- LOG.error("CBO failed due to missing column stats (see previous errors), skipping CBO");
- this.ctx
- .setCboInfo("Plan not optimized by CBO due to missing statistics. Please check log for more details.");
- } else {
- LOG.error("CBO failed, skipping CBO. ", e);
- if (e instanceof CalciteSemanticException) {
- CalciteSemanticException calciteSemanticException = (CalciteSemanticException) e;
- UnsupportedFeature unsupportedFeature = calciteSemanticException
- .getUnsupportedFeature();
- if (unsupportedFeature != null) {
- this.ctx.setCboInfo("Plan not optimized by CBO due to missing feature ["
- + unsupportedFeature + "].");
- } else {
- this.ctx.setCboInfo("Plan not optimized by CBO.");
- }
- } else {
- this.ctx.setCboInfo("Plan not optimized by CBO.");
- }
- }
- if( e instanceof CalciteSubquerySemanticException) {
- // non-cbo path retries to execute subqueries and throws completely different exception/error
- // to eclipse the original error message
- // so avoid executing subqueries on non-cbo
- throw new SemanticException(e);
- }
- else if( e instanceof CalciteViewSemanticException) {
- // non-cbo path retries to execute create view and
- // we believe it will throw the same error message
- throw new SemanticException(e);
- }
- else if (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) || isMissingStats
- || e instanceof CalciteSemanticException ) {
- reAnalyzeAST = true;
- } else if (e instanceof SemanticException) {
- // although, its likely to be a valid exception, we will retry
- // with cbo off anyway.
- reAnalyzeAST = true;
- } else if (e instanceof RuntimeException) {
- throw (RuntimeException) e;
- } else {
- throw new SemanticException(e);
- }
- } finally {
- runCBO = false;
- disableJoinMerge = defaultJoinMerge;
- disableSemJoinReordering = false;
- if (reAnalyzeAST) {
- init(true);
- prunedPartitions.clear();
- // Assumption: At this point Parse Tree gen & resolution will always
- // be true (since we started out that way).
- super.genResolvedParseTree(ast, new PlannerContext());
- skipCalcitePlan = true;
- }
- }
- } else {
- this.ctx.setCboInfo("Plan not optimized by CBO.");
- skipCalcitePlan = true;
- }
- }
-
- if (skipCalcitePlan) {
- sinkOp = super.genOPTree(ast, plannerCtx);
- }
-
- return sinkOp;
- }
-
- private void handleCreateViewDDL(ASTNode newAST) throws SemanticException {
- saveViewDefinition();
- String originalText = createVwDesc.getViewOriginalText();
- String expandedText = createVwDesc.getViewExpandedText();
- List<FieldSchema> schema = createVwDesc.getSchema();
- List<FieldSchema> partitionColumns = createVwDesc.getPartCols();
- init(false);
- setAST(newAST);
- newAST = reAnalyzeViewAfterCbo(newAST);
- createVwDesc.setViewOriginalText(originalText);
- createVwDesc.setViewExpandedText(expandedText);
- createVwDesc.setSchema(schema);
- createVwDesc.setPartCols(partitionColumns);
- }
-
- /*
- * Tries to optimize FROM clause of multi-insert. No attempt to optimize insert clauses of the query.
- * Returns true if rewriting is successful, false otherwise.
- */
- private void handleMultiDestQuery(ASTNode ast, PreCboCtx cboCtx) throws SemanticException {
- // Not supported by CBO
- if (!runCBO) {
- return;
- }
- // Currently, we only optimized the query the content of the FROM clause
- // for multi-insert queries. Thus, nodeOfInterest is the FROM clause
- if (isJoinToken(cboCtx.nodeOfInterest)) {
- // Join clause: rewriting is needed
- ASTNode subq = rewriteASTForMultiInsert(ast, cboCtx.nodeOfInterest);
- if (subq != null) {
- // We could rewrite into a subquery
- cboCtx.nodeOfInterest = (ASTNode) subq.getChild(0);
- QB newQB = new QB(null, "", false);
- Phase1Ctx ctx_1 = initPhase1Ctx();
- doPhase1(cboCtx.nodeOfInterest, newQB, ctx_1, null);
- setQB(newQB);
- getMetaData(getQB());
- } else {
- runCBO = false;
- }
- } else if (cboCtx.nodeOfInterest.getToken().getType() == HiveParser.TOK_SUBQUERY) {
- // Subquery: no rewriting needed
- ASTNode subq = cboCtx.nodeOfInterest;
- // First child is subquery, second child is alias
- // We set the node of interest and QB to the subquery
- // We do not need to generate the QB again, but rather we use it directly
- cboCtx.nodeOfInterest = (ASTNode) subq.getChild(0);
- String subQAlias = unescapeIdentifier(subq.getChild(1).getText());
- final QB newQB = getQB().getSubqForAlias(subQAlias).getQB();
- newQB.getParseInfo().setAlias("");
- newQB.getParseInfo().setIsSubQ(false);
- setQB(newQB);
- } else {
- // No need to run CBO (table ref or virtual table) or not supported
- runCBO = false;
- }
- }
-
- private ASTNode rewriteASTForMultiInsert(ASTNode query, ASTNode nodeOfInterest) {
- // 1. gather references from original query
- // This is a map from aliases to references.
- // We keep all references as we will need to modify them after creating
- // the subquery
- final Multimap<String, Object> aliasNodes = ArrayListMultimap.create();
- // To know if we need to bail out
- final AtomicBoolean notSupported = new AtomicBoolean(false);
- TreeVisitorAction action = new TreeVisitorAction() {
- @Override
- public Object pre(Object t) {
- if (!notSupported.get()) {
- if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_ALLCOLREF) {
- // TODO: this is a limitation of the AST rewriting approach that we will
- // not be able to overcome till proper integration of full multi-insert
- // queries with Calcite is implemented.
- // The current rewriting gather references from insert clauses and then
- // updates them with the new subquery references. However, if insert
- // clauses use * or tab.*, we cannot resolve the columns that we are
- // referring to. Thus, we just bail out and those queries will not be
- // currently optimized by Calcite.
- // An example of such query is:
- // FROM T_A a LEFT JOIN T_B b ON a.id = b.id
- // INSERT OVERWRITE TABLE join_result_1
- // SELECT a.*, b.*
- // INSERT OVERWRITE TABLE join_result_3
- // SELECT a.*, b.*;
- notSupported.set(true);
- } else if (ParseDriver.adaptor.getType(t) == HiveParser.DOT) {
- Object c = ParseDriver.adaptor.getChild(t, 0);
- if (c != null && ParseDriver.adaptor.getType(c) == HiveParser.TOK_TABLE_OR_COL) {
- aliasNodes.put(((ASTNode) t).toStringTree(), t);
- }
- } else if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_TABLE_OR_COL) {
- Object p = ParseDriver.adaptor.getParent(t);
- if (p == null || ParseDriver.adaptor.getType(p) != HiveParser.DOT) {
- aliasNodes.put(((ASTNode) t).toStringTree(), t);
- }
- }
- }
- return t;
- }
- @Override
- public Object post(Object t) {
- return t;
- }
- };
- TreeVisitor tv = new TreeVisitor(ParseDriver.adaptor);
- // We will iterate through the children: if it is an INSERT, we will traverse
- // the subtree to gather the references
- for (int i = 0; i < query.getChildCount(); i++) {
- ASTNode child = (ASTNode) query.getChild(i);
- if (ParseDriver.adaptor.getType(child) != HiveParser.TOK_INSERT) {
- // If it is not an INSERT, we do not need to anything
- continue;
- }
- tv.visit(child, action);
- }
- if (notSupported.get()) {
- // Bail out
- return null;
- }
- // 2. rewrite into query
- // TOK_QUERY
- // TOK_FROM
- // join
- // TOK_INSERT
- // TOK_DESTINATION
- // TOK_DIR
- // TOK_TMP_FILE
- // TOK_SELECT
- // refs
- ASTNode from = new ASTNode(new CommonToken(HiveParser.TOK_FROM, "TOK_FROM"));
- from.addChild((ASTNode) ParseDriver.adaptor.dupTree(nodeOfInterest));
- ASTNode destination = new ASTNode(new CommonToken(HiveParser.TOK_DESTINATION, "TOK_DESTINATION"));
- ASTNode dir = new ASTNode(new CommonToken(HiveParser.TOK_DIR, "TOK_DIR"));
- ASTNode tmpFile = new ASTNode(new CommonToken(HiveParser.TOK_TMP_FILE, "TOK_TMP_FILE"));
- dir.addChild(tmpFile);
- destination.addChild(dir);
- ASTNode select = new ASTNode(new CommonToken(HiveParser.TOK_SELECT, "TOK_SELECT"));
- int num = 0;
- for (Collection<Object> selectIdentifier : aliasNodes.asMap().values()) {
- Iterator<Object> it = selectIdentifier.iterator();
- ASTNode node = (ASTNode) it.next();
- // Add select expression
- ASTNode selectExpr = new ASTNode(new CommonToken(HiveParser.TOK_SELEXPR, "TOK_SELEXPR"));
- selectExpr.addChild((ASTNode) ParseDriver.adaptor.dupTree(node)); // Identifier
- String colAlias = "col" + num;
- selectExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias))); // Alias
- select.addChild(selectExpr);
- // Rewrite all INSERT references (all the node values for this key)
- ASTNode colExpr = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"));
- colExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias)));
- replaceASTChild(node, colExpr);
- while (it.hasNext()) {
- // Loop to rewrite rest of INSERT references
- node = (ASTNode) it.next();
- colExpr = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"));
- colExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias)));
- replaceASTChild(node, colExpr);
- }
- num++;
- }
- ASTNode insert = new ASTNode(new CommonToken(HiveParser.TOK_INSERT, "TOK_INSERT"));
- insert.addChild(destination);
- insert.addChild(select);
- ASTNode newQuery = new ASTNode(new CommonToken(HiveParser.TOK_QUERY, "TOK_QUERY"));
- newQuery.addChild(from);
- newQuery.addChild(insert);
- // 3. create subquery
- ASTNode subq = new ASTNode(new CommonToken(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY"));
- subq.addChild(newQuery);
- subq.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, "subq")));
- replaceASTChild(nodeOfInterest, subq);
- // 4. return subquery
- return subq;
- }
-
- /**
- * Can CBO handle the given AST?
- *
- * @param ast
- * Top level AST
- * @param qb
- * top level QB corresponding to the AST
- * @param cboCtx
- * @param semAnalyzer
- * @return boolean
- *
- * Assumption:<br>
- * If top level QB is query then everything below it must also be
- * Query.
- */
- boolean canCBOHandleAst(ASTNode ast, QB qb, PreCboCtx cboCtx) {
- int root = ast.getToken().getType();
- boolean needToLogMessage = STATIC_LOG.isInfoEnabled();
- boolean isSupportedRoot = root == HiveParser.TOK_QUERY || root == HiveParser.TOK_EXPLAIN
- || qb.isCTAS() || qb.isMaterializedView();
- // Queries without a source table currently are not supported by CBO
- boolean isSupportedType = (qb.getIsQuery() && !qb.containsQueryWithoutSourceTable())
- || qb.isCTAS() || qb.isMaterializedView() || cboCtx.type == PreCboCtx.Type.INSERT
- || cboCtx.type == PreCboCtx.Type.MULTI_INSERT;
- boolean noBadTokens = HiveCalciteUtil.validateASTForUnsupportedTokens(ast);
- boolean result = isSupportedRoot && isSupportedType && noBadTokens;
-
- if (!result) {
- if (needToLogMessage) {
- String msg = "";
- if (!isSupportedRoot) {
- msg += "doesn't have QUERY or EXPLAIN as root and not a CTAS; ";
- }
- if (!isSupportedType) {
- msg += "is not a query with at least one source table "
- + " or there is a subquery without a source table, or CTAS, or insert; ";
- }
- if (!noBadTokens) {
- msg += "has unsupported tokens; ";
- }
-
- if (msg.isEmpty()) {
- msg += "has some unspecified limitations; ";
- }
- STATIC_LOG.info("Not invoking CBO because the statement "
- + msg.substring(0, msg.length() - 2));
- }
- return false;
- }
- // Now check QB in more detail. canHandleQbForCbo returns null if query can
- // be handled.
- String msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage, qb);
- if (msg == null) {
- return true;
- }
- if (needToLogMessage) {
- STATIC_LOG.info("Not invoking CBO because the statement "
- + msg.substring(0, msg.length() - 2));
- }
- return false;
- }
-
- /**
- * Checks whether Calcite can handle the query.
- *
- * @param queryProperties
- * @param conf
- * @param topLevelQB
- * Does QB corresponds to top most query block?
- * @param verbose
- * Whether return value should be verbose in case of failure.
- * @return null if the query can be handled; non-null reason string if it
- * cannot be.
- *
- * Assumption:<br>
- * 1. If top level QB is query then everything below it must also be
- * Query<br>
- * 2. Nested Subquery will return false for qbToChk.getIsQuery()
- */
- static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf,
- boolean topLevelQB, boolean verbose, QB qb) {
-
- if (!queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy()
- && !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript()
- && !queryProperties.hasLateralViews()) {
- // Ok to run CBO.
- return null;
- }
-
- // Not ok to run CBO, build error message.
- String msg = "";
- if (verbose) {
- if (queryProperties.hasClusterBy())
- msg += "has cluster by; ";
- if (queryProperties.hasDistributeBy())
- msg += "has distribute by; ";
- if (queryProperties.hasSortBy())
- msg += "has sort by; ";
- if (queryProperties.hasPTF())
- msg += "has PTF; ";
- if (queryProperties.usesScript())
- msg += "uses scripts; ";
- if (queryProperties.hasLateralViews())
- msg += "has lateral views; ";
-
- if (msg.isEmpty())
- msg += "has some unspecified limitations; ";
- }
- return msg;
- }
-
- /* This method inserts the right profiles into profiles CBO depending
- * on the query characteristics. */
- private static EnumSet<ExtendedCBOProfile> obtainCBOProfiles(QueryProperties queryProperties) {
- EnumSet<ExtendedCBOProfile> profilesCBO = EnumSet.noneOf(ExtendedCBOProfile.class);
- // If the query contains more than one join
- if (queryProperties.getJoinCount() > 1) {
- profilesCBO.add(ExtendedCBOProfile.JOIN_REORDERING);
- }
- // If the query contains windowing processing
- if (queryProperties.hasWindowing()) {
- profilesCBO.add(ExtendedCBOProfile.WINDOWING_POSTPROCESSING);
- }
- return profilesCBO;
- }
-
- @Override
- boolean isCBOExecuted() {
- return runCBO;
- }
-
- @Override
- boolean continueJoinMerge() {
- return !(runCBO && disableSemJoinReordering);
- }
-
- @Override
- Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
-
- ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE));
-
- ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME));
- tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName)));
-
- ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER));
-
- createTable.addChild(tableName);
- createTable.addChild(temporary);
- createTable.addChild(cte.cteNode);
-
- CalcitePlanner analyzer = new CalcitePlanner(queryState);
- analyzer.initCtx(ctx);
- analyzer.init(false);
-
- // should share cte contexts
- analyzer.aliasToCTEs.putAll(aliasToCTEs);
-
- HiveOperation operation = queryState.getHiveOperation();
- try {
- analyzer.analyzeInternal(createTable);
- } finally {
- queryState.setCommandType(operation);
- }
-
- Table table = analyzer.tableDesc.toTable(conf);
- Path location = table.getDataLocation();
- try {
- location.getFileSystem(conf).mkdirs(location);
- } catch (IOException e) {
- throw new HiveException(e);
- }
- table.setMaterializedTable(true);
-
- LOG.info(cteName + " will be materialized into " + location);
- cte.table = table;
- cte.source = analyzer;
-
- ctx.addMaterializedTable(cteName, table);
- // For CalcitePlanner, store qualified name too
- ctx.addMaterializedTable(table.getDbName() + "." + table.getTableName(), table);
-
- return table;
- }
-
- @Override
- String fixCtasColumnName(String colName) {
- if (runCBO) {
- int lastDot = colName.lastIndexOf('.');
- if (lastDot < 0)
- return colName; // alias is not fully qualified
- String nqColumnName = colName.substring(lastDot + 1);
- STATIC_LOG.debug("Replacing " + colName + " (produced by CBO) by " + nqColumnName);
- return nqColumnName;
- }
-
- return super.fixCtasColumnName(colName);
- }
-
- /**
- * The context that doPhase1 uses to populate information pertaining to CBO
- * (currently, this is used for CTAS and insert-as-select).
- */
- static class PreCboCtx extends PlannerContext {
- enum Type {
- NONE, INSERT, MULTI_INSERT, CTAS, VIEW, UNEXPECTED
- }
-
- private ASTNode nodeOfInterest;
- private Type type = Type.NONE;
-
- private void set(Type type, ASTNode ast) {
- if (this.type != Type.NONE) {
- STATIC_LOG.warn("Setting " + type + " when already " + this.type + "; node " + ast.dump()
- + " vs old node " + nodeOfInterest.dump());
- this.type = Type.UNEXPECTED;
- return;
- }
- this.type = type;
- this.nodeOfInterest = ast;
- }
-
- @Override
- void setCTASToken(ASTNode child) {
- set(PreCboCtx.Type.CTAS, child);
- }
-
- @Override
- void setViewToken(ASTNode child) {
- set(PreCboCtx.Type.VIEW, child);
- }
-
- @Override
- void setInsertToken(ASTNode ast, boolean isTmpFileDest) {
- if (!isTmpFileDest) {
- set(PreCboCtx.Type.INSERT, ast);
- }
- }
-
- @Override
- void setMultiInsertToken(ASTNode child) {
- set(PreCboCtx.Type.MULTI_INSERT, child);
- }
-
- @Override
- void resetToken() {
- this.type = Type.NONE;
- this.nodeOfInterest = null;
- }
- }
-
- ASTNode fixUpAfterCbo(ASTNode originalAst, ASTNode newAst, PreCboCtx cboCtx)
- throws SemanticException {
- switch (cboCtx.type) {
-
- case NONE:
- // nothing to do
- return newAst;
-
- case CTAS:
- case VIEW: {
- // Patch the optimized query back into original CTAS AST, replacing the
- // original query.
- replaceASTChild(cboCtx.nodeOfInterest, newAst);
- return originalAst;
- }
-
- case INSERT: {
- // We need to patch the dest back to original into new query.
- // This makes assumptions about the structure of the AST.
- ASTNode newDest = new ASTSearcher().simpleBreadthFirstSearch(newAst, HiveParser.TOK_QUERY,
- HiveParser.TOK_INSERT, HiveParser.TOK_DESTINATION);
- if (newDest == null) {
- LOG.error("Cannot find destination after CBO; new ast is " + newAst.dump());
- throw new SemanticException("Cannot find destination after CBO");
- }
- replaceASTChild(newDest, cboCtx.nodeOfInterest);
- return newAst;
- }
-
- case MULTI_INSERT: {
- // Patch the optimized query back into original FROM clause.
- replaceASTChild(cboCtx.nodeOfInterest, newAst);
- return originalAst;
- }
-
- default:
- throw new AssertionError("Unexpected type " + cboCtx.type);
- }
- }
-
- ASTNode reAnalyzeCTASAfterCbo(ASTNode newAst) throws SemanticException {
- // analyzeCreateTable uses this.ast, but doPhase1 doesn't, so only reset it
- // here.
- newAst = analyzeCreateTable(newAst, getQB(), null);
- if (newAst == null) {
- LOG.error("analyzeCreateTable failed to initialize CTAS after CBO;" + " new ast is "
- + getAST().dump());
- throw new SemanticException("analyzeCreateTable failed to initialize CTAS after CBO");
- }
- return newAst;
- }
-
- ASTNode reAnalyzeViewAfterCbo(ASTNode newAst) throws SemanticException {
- // analyzeCreateView uses this.ast, but doPhase1 doesn't, so only reset it
- // here.
- newAst = analyzeCreateView(newAst, getQB(), null);
- if (newAst == null) {
- LOG.error("analyzeCreateTable failed to initialize materialized view after CBO;" + " new ast is "
- + getAST().dump());
- throw new SemanticException("analyzeCreateTable failed to initialize materialized view after CBO");
- }
- return newAst;
- }
-
-
- public static class ASTSearcher {
- private final LinkedList<ASTNode> searchQueue = new LinkedList<ASTNode>();
-
- /**
- * Performs breadth-first search of the AST for a nested set of tokens. Tokens
- * don't have to be each others' direct children, they can be separated by
- * layers of other tokens. For each token in the list, the first one found is
- * matched and there's no backtracking; thus, if AST has multiple instances of
- * some token, of which only one matches, it is not guaranteed to be found. We
- * use this for simple things. Not thread-safe - reuses searchQueue.
- */
- public ASTNode simpleBreadthFirstSearch(ASTNode ast, int... tokens) {
- searchQueue.clear();
- searchQueue.add(ast);
- for (int i = 0; i < tokens.length; ++i) {
- boolean found = false;
- int token = tokens[i];
- while (!searchQueue.isEmpty() && !found) {
- ASTNode next = searchQueue.poll();
- found = next.getType() == token;
- if (found) {
- if (i == tokens.length - 1)
- return next;
- searchQueue.clear();
- }
- for (int j = 0; j < next.getChildCount(); ++j) {
- searchQueue.add((ASTNode) next.getChild(j));
- }
- }
- if (!found)
- return null;
- }
- return null;
- }
-
- public ASTNode depthFirstSearch(ASTNode ast, int token) {
- searchQueue.clear();
- searchQueue.add(ast);
- while (!searchQueue.isEmpty()) {
- ASTNode next = searchQueue.poll();
- if (next.getType() == token) return next;
- for (int j = 0; j < next.getChildCount(); ++j) {
- searchQueue.add((ASTNode) next.getChild(j));
- }
- }
- return null;
- }
-
- public ASTNode simpleBreadthFirstSearchAny(ASTNode ast, int... tokens) {
- searchQueue.clear();
- searchQueue.add(ast);
- while (!searchQueue.isEmpty()) {
- ASTNode next = searchQueue.poll();
- for (int i = 0; i < tokens.length; ++i) {
- if (next.getType() == tokens[i]) return next;
- }
- for (int i = 0; i < next.getChildCount(); ++i) {
- searchQueue.add((ASTNode) next.getChild(i));
- }
- }
- return null;
- }
-
- public void reset() {
- searchQueue.clear();
- }
- }
-
- private static void replaceASTChild(ASTNode child, ASTNode newChild) {
- ASTNode parent = (ASTNode) child.parent;
- int childIndex = child.childIndex;
- parent.deleteChild(childIndex);
- parent.insertChild(childIndex, newChild);
- }
-
- /**
- * Get optimized logical plan for the given QB tree in the semAnalyzer.
- *
- * @return
- * @throws SemanticException
- */
- RelNode logicalPlan() throws SemanticException {
- RelNode optimizedOptiqPlan = null;
-
- CalcitePlannerAction calcitePlannerAction = null;
- if (this.columnAccessInfo == null) {
- this.columnAccessInfo = new ColumnAccessInfo();
- }
- calcitePlannerAction = new CalcitePlannerAction(prunedPartitions, this.columnAccessInfo);
-
- try {
- optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks
- .newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build());
- } catch (Exception e) {
- rethrowCalciteException(e);
- throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage());
- }
- return optimizedOptiqPlan;
- }
-
- /**
- * Get Optimized AST for the given QB tree in the semAnalyzer.
- *
- * @return Optimized operator tree translated in to Hive AST
- * @throws SemanticException
- */
- ASTNode getOptimizedAST() throws SemanticException {
- RelNode optimizedOptiqPlan = logicalPlan();
- ASTNode optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, resultSchema,
- HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_COLUMN_ALIGNMENT));
- return optiqOptimizedAST;
- }
-
- /**
- * Get Optimized Hive Operator DAG for the given QB tree in the semAnalyzer.
- *
- * @return Optimized Hive operator tree
- * @throws SemanticException
- */
- Operator getOptimizedHiveOPDag() throws SemanticException {
- RelNode optimizedOptiqPlan = null;
- CalcitePlannerAction calcitePlannerAction = null;
- if (this.columnAccessInfo == null) {
- this.columnAccessInfo = new ColumnAccessInfo();
- }
- calcitePlannerAction = new CalcitePlannerAction(prunedPartitions, this.columnAccessInfo);
-
- try {
- optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks
- .newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build());
- } catch (Exception e) {
- rethrowCalciteException(e);
- throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage());
- }
-
- RelNode modifiedOptimizedOptiqPlan = PlanModifierForReturnPath.convertOpTree(
- optimizedOptiqPlan, resultSchema, this.getQB().getTableDesc() != null);
-
- LOG.debug("Translating the following plan:\n" + RelOptUtil.toString(modifiedOptimizedOptiqPlan));
- Operator<?> hiveRoot = new HiveOpConverter(this, conf, unparseTranslator, topOps)
- .convert(modifiedOptimizedOptiqPlan);
- RowResolver hiveRootRR = genRowResolver(hiveRoot, getQB());
- opParseCtx.put(hiveRoot, new OpParseContext(hiveRootRR));
- String dest = getQB().getParseInfo().getClauseNames().iterator().next();
- if (getQB().getParseInfo().getDestSchemaForClause(dest) != null
- && this.getQB().getTableDesc() == null) {
- Operator<?> selOp = handleInsertStatement(dest, hiveRoot, hiveRootRR, getQB());
- return genFileSinkPlan(dest, getQB(), selOp);
- } else {
- return genFileSinkPlan(dest, getQB(), hiveRoot);
- }
- }
-
- // This function serves as the wrapper of handleInsertStatementSpec in
- // SemanticAnalyzer
- Operator<?> handleInsertStatement(String dest, Operator<?> input, RowResolver inputRR, QB qb)
- throws SemanticException {
- ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
- ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
- for (int i = 0; i < columns.size(); i++) {
- ColumnInfo col = columns.get(i);
- colList.add(new ExprNodeColumnDesc(col));
- }
- ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
-
- RowResolver out_rwsch = handleInsertStatementSpec(colList, dest, inputRR, inputRR, qb,
- selExprList);
-
- ArrayList<String> columnNames = new ArrayList<String>();
- Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
- for (int i = 0; i < colList.size(); i++) {
- String outputCol = getColumnInternalName(i);
- colExprMap.put(outputCol, colList.get(i));
- columnNames.add(outputCol);
- }
- Operator<?> output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList,
- columnNames), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
- output.setColumnExprMap(colExprMap);
- return output;
- }
-
- /***
- * Unwraps Calcite Invocation exceptions coming meta data provider chain and
- * obtains the real cause.
- *
- * @param Exception
- */
- private void rethrowCalciteException(Exception e) throws SemanticException {
- Throwable first = (semanticException != null) ? semanticException : e, current = first, cause = current
- .getCause();
- while (cause != null) {
- Throwable causeOfCause = cause.getCause();
- if (current == first && causeOfCause == null && isUselessCause(first)) {
- // "cause" is a root cause, and "e"/"first" is a useless
- // exception it's wrapped in.
- first = cause;
- break;
- } else if (causeOfCause != null && isUselessCause(cause)
- && ExceptionHelper.resetCause(current, causeOfCause)) {
- // "cause" was a useless intermediate cause and was replace it
- // with its own cause.
- cause = causeOfCause;
- continue; // do loop once again with the new cause of "current"
- }
- current = cause;
- cause = current.getCause();
- }
-
- if (first instanceof RuntimeException) {
- throw (RuntimeException) first;
- } else if (first instanceof SemanticException) {
- throw (SemanticException) first;
- }
- throw new RuntimeException(first);
- }
-
- private static class ExceptionHelper {
- private static final Field CAUSE_FIELD = getField(Throwable.class, "cause"),
- TARGET_FIELD = getField(InvocationTargetException.class, "target"),
- MESSAGE_FIELD = getField(Throwable.class, "detailMessage");
-
- private static Field getField(Class<?> clazz, String name) {
- try {
- Field f = clazz.getDeclaredField(name);
- f.setAccessible(true);
- return f;
- } catch (Throwable t) {
- return null;
- }
- }
-
- public static boolean resetCause(Throwable target, Throwable newCause) {
- try {
- if (MESSAGE_FIELD == null)
- return false;
- Field field = (target instanceof InvocationTargetException) ? TARGET_FIELD : CAUSE_FIELD;
- if (field == null)
- return false;
-
- Throwable oldCause = target.getCause();
- String oldMsg = target.getMessage();
- field.set(target, newCause);
- if (oldMsg != null && oldMsg.equals(oldCause.toString())) {
- MESSAGE_FIELD.set(target, newCause == null ? null : newCause.toString());
- }
- } catch (Throwable se) {
- return false;
- }
- return true;
- }
- }
-
- private boolean isUselessCause(Throwable t) {
- return t instanceof RuntimeException || t instanceof InvocationTargetException
- || t instanceof UndeclaredThrowableException;
- }
-
- private RowResolver genRowResolver(Operator op, QB qb) {
- RowResolver rr = new RowResolver();
- String subqAlias = (qb.getAliases().size() == 1 && qb.getSubqAliases().size() == 1) ? qb
- .getAliases().get(0) : null;
-
- for (ColumnInfo ci : op.getSchema().getSignature()) {
- try {
- rr.putWithCheck((subqAlias != null) ? subqAlias : ci.getTabAlias(),
- ci.getAlias() != null ? ci.getAlias() : ci.getInternalName(), ci.getInternalName(),
- new ColumnInfo(ci));
- } catch (SemanticException e) {
- throw new RuntimeException(e);
- }
- }
-
- return rr;
- }
-
- private enum ExtendedCBOProfile {
- JOIN_REORDERING,
- WINDOWING_POSTPROCESSING;
- }
-
- /**
- * Code responsible for Calcite plan generation and optimization.
- */
- private class CalcitePlannerAction implements Frameworks.PlannerAction<RelNode> {
- private RelOptCluster cluster;
- private RelOptSchema relOptSchema;
- private final Map<String, PrunedPartitionList> partitionCache;
- private final ColumnAccessInfo columnAccessInfo;
- private Map<HiveProject, Table> viewProjectToTableSchema;
-
- //correlated vars across subqueries within same query needs to have different ID
- // this will be used in RexNodeConverter to create cor var
- private int subqueryId;
-
- // this is to keep track if a subquery is correlated and contains aggregate
- // since this is special cased when it is rewritten in SubqueryRemoveRule
- Set<RelNode> corrScalarRexSQWithAgg = new HashSet<RelNode>();
-
- // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or
- // just last one.
- LinkedHashMap<RelNode, RowResolver> relToHiveRR = new LinkedHashMap<RelNode, RowResolver>();
- LinkedHashMap<RelNode, ImmutableMap<String, Integer>> relToHiveColNameCalcitePosMap = new LinkedHashMap<RelNode, ImmutableMap<String, Integer>>();
-
- CalcitePlannerAction(Map<String, PrunedPartitionList> partitionCache, ColumnAccessInfo columnAccessInfo) {
- this.partitionCache = partitionCache;
- this.columnAccessInfo = columnAccessInfo;
- }
-
- @Override
- public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) {
- RelNode calciteGenPlan = null;
- RelNode calcitePreCboPlan = null;
- RelNode calciteOptimizedPlan = null;
- subqueryId = 0;
-
- /*
- * recreate cluster, so that it picks up the additional traitDef
- */
- final Double maxSplitSize = (double) HiveConf.getLongVar(
- conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE);
- final Double maxMemory = (double) HiveConf.getLongVar(
- conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
- HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory);
- HiveRulesRegistry registry = new HiveRulesRegistry();
- Properties calciteConfigProperties = new Properties();
- calciteConfigProperties.setProperty(
- CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(),
- Boolean.FALSE.toString());
- CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(calciteConfigProperties);
- HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig,
- corrScalarRexSQWithAgg);
- RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext);
- final RexBuilder rexBuilder = cluster.getRexBuilder();
- final RelOptCluster optCluster = RelOptCluster.create(planner, rexBuilder);
-
- this.cluster = optCluster;
- this.relOptSchema = relOptSchema;
-
- PerfLogger perfLogger = SessionState.getPerfLogger();
-
- // 1. Gen Calcite Plan
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- try {
- calciteGenPlan = genLogicalPlan(getQB(), true, null, null);
- // if it is to create view, we do not use table alias
- resultSchema = SemanticAnalyzer.convertRowSchemaToResultSetSchema(
- relToHiveRR.get(calciteGenPlan),
- getQB().isView() ? false : HiveConf.getBoolVar(conf,
- HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
- } catch (SemanticException e) {
- semanticException = e;
- throw new RuntimeException(e);
- }
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation");
-
- // Create executor
- RexExecutor executorProvider = new HiveRexExecutorImpl(optCluster);
- calciteGenPlan.getCluster().getPlanner().setExecutor(executorProvider);
-
- // We need to get the ColumnAccessInfo and viewToTableSchema for views.
- HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
- HiveRelFactories.HIVE_BUILDER.create(optCluster, null), this.columnAccessInfo,
- this.viewProjectToTableSchema);
-
- fieldTrimmer.trim(calciteGenPlan);
-
- // Create and set MD provider
- HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf);
- RelMetadataQuery.THREAD_PROVIDERS.set(
- JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
-
- //Remove subquery
- LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
- calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null,
- HiveSubQueryRemoveRule.REL_NODE);
- LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
-
- calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan);
- LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan));
-
- // 2. Apply pre-join order optimizations
- calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan,
- mdProvider.getMetadataProvider(), executorProvider);
-
- // 3. Apply join order optimizations: reordering MST algorithm
- // If join optimizations failed because of missing stats, we continue with
- // the rest of optimizations
- if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- try {
- List<RelMetadataProvider> list = Lists.newArrayList();
- list.add(mdProvider.getMetadataProvider());
- RelTraitSet desiredTraits = optCluster
- .traitSetOf(HiveRelNode.CONVENTION, RelCollations.EMPTY);
-
- HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP);
- hepPgmBldr.addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class));
- hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveRelFactories.HIVE_BUILDER));
-
- HepProgram hepPgm = hepPgmBldr.build();
- HepPlanner hepPlanner = new HepPlanner(hepPgm);
-
- hepPlanner.registerMetadataProviders(list);
- RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list);
- optCluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner));
-
- RelNode rootRel = calcitePreCboPlan;
- hepPlanner.setRoot(rootRel);
- if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) {
- rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits);
- }
- hepPlanner.setRoot(rootRel);
-
- calciteOptimizedPlan = hepPlanner.findBestExp();
- } catch (Exception e) {
- boolean isMissingStats = noColsMissingStats.get() > 0;
- if (isMissingStats) {
- LOG.warn("Missing column stats (see previous messages), skipping join reordering in CBO");
- noColsMissingStats.set(0);
- calciteOptimizedPlan = calcitePreCboPlan;
- disableSemJoinReordering = false;
- } else {
- throw e;
- }
- }
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Join Reordering");
- } else {
- calciteOptimizedPlan = calcitePreCboPlan;
- disableSemJoinReordering = false;
- }
-
- // 4. Run other optimizations that do not need stats
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, UnionMergeRule.INSTANCE,
- HiveProjectMergeRule.INSTANCE_NO_FORCE, HiveAggregateProjectMergeRule.INSTANCE,
- HiveJoinCommuteRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats");
-
- // 5. Materialized view based rewriting
- // We disable it for CTAS and MV creation queries (trying to avoid any problem
- // due to data freshness)
- if (conf.getBoolVar(ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING) &&
- !getQB().isMaterializedView() && !getQB().isCTAS()) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- // Use Calcite cost model for view rewriting
- RelMetadataProvider calciteMdProvider = DefaultRelMetadataProvider.INSTANCE;
- RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(calciteMdProvider));
- planner.registerMetadataProviders(Lists.newArrayList(calciteMdProvider));
- // Add views to planner
- List<RelOptMaterialization> materializations = new ArrayList<>();
- try {
- materializations = Hive.get().getRewritingMaterializedViews();
- // We need to use the current cluster for the scan operator on views,
- // otherwise the planner will throw an Exception (different planners)
- materializations = Lists.transform(materializations,
- new Function<RelOptMaterialization, RelOptMaterialization>() {
- @Override
- public RelOptMaterialization apply(RelOptMaterialization materialization) {
- final RelNode viewScan = materialization.tableRel;
- final RelNode newViewScan;
- if (viewScan instanceof DruidQuery) {
- final DruidQuery dq = (DruidQuery) viewScan;
- newViewScan = DruidQuery.create(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION),
- viewScan.getTable(), dq.getDruidTable(),
- ImmutableList.<RelNode>of(dq.getTableScan()));
- } else {
- newViewScan = new HiveTableScan(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION),
- (RelOptHiveTable) viewScan.getTable(), viewScan.getTable().getQualifiedName().get(0),
- null, false, false);
- }
- return new RelOptMaterialization(newViewScan, materialization.queryRel, null);
- }
- }
- );
- } catch (HiveException e) {
- LOG.warn("Exception loading materialized views", e);
- }
- if (!materializations.isEmpty()) {
- for (RelOptMaterialization materialization : materializations) {
- planner.addMaterialization(materialization);
- }
- // Add view-based rewriting rules to planner
- planner.addRule(HiveMaterializedViewFilterScanRule.INSTANCE);
- // Optimize plan
- planner.setRoot(calciteOptimizedPlan);
- calciteOptimizedPlan = planner.findBestExp();
- // Remove view-based rewriting rules from planner
- planner.clear();
- }
- // Restore default cost model
- RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: View-based rewriting");
- }
-
- // 6. Run aggregate-join transpose (cost based)
- // If it failed because of missing stats, we continue with
- // the rest of optimizations
- if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- try {
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, HiveAggregateJoinTransposeRule.INSTANCE);
- } catch (Exception e) {
- boolean isMissingStats = noColsMissingStats.get() > 0;
- if (isMissingStats) {
- LOG.warn("Missing column stats (see previous messages), skipping aggregate-join transpose in CBO");
- noColsMissingStats.set(0);
- } else {
- throw e;
- }
- }
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Aggregate join transpose");
- }
-
- // 7.convert Join + GBy to semijoin
- // run this rule at later stages, since many calcite rules cant deal with semijoin
- if (conf.getBoolVar(ConfVars.SEMIJOIN_CONVERSION)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, HiveSemiJoinRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Semijoin conversion");
- }
-
-
- // 8. Run rule to fix windowing issue when it is done over
- // aggregation columns (HIVE-10627)
- if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, HiveWindowingFixRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Window fixing rule");
- }
-
- // 9. Apply Druid transformation rules
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, DruidRules.FILTER, DruidRules.AGGREGATE_PROJECT,
- DruidRules.PROJECT, DruidRules.AGGREGATE, DruidRules.SORT_PROJECT_TRANSPOSE,
- DruidRules.SORT, DruidRules.PROJECT_SORT_TRANSPOSE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Druid transformation rules");
-
- // 10. Run rules to aid in translation from Calcite tree to Hive tree
- if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- // 10.1. Merge join into multijoin operators (if possible)
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER,
- HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER,
- HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER,
- HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE);
- // The previous rules can pull up projections through join operators,
- // thus we run the field trimmer again to push them back down
- fieldTrimmer = new HiveRelFieldTrimmer(null,
- HiveRelFactories.HIVE_BUILDER.create(optCluster, null));
- calciteOptimizedPlan = fieldTrimmer.trim(calciteOptimizedPlan);
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE,
- new ProjectMergeRule(false, HiveRelFactories.HIVE_BUILDER));
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null,
- HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID,
- HiveProjectFilterPullUpConstantsRule.INSTANCE);
-
- // 10.2. Introduce exchange operators below join/multijoin operators
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN,
- HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Translation from Calcite tree to Hive tree");
- }
-
- if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) {
- LOG.debug("CBO Planning details:\n");
- LOG.debug("Original Plan:\n" + RelOptUtil.toString(calciteGenPlan));
- LOG.debug("Plan After PPD, PartPruning, ColumnPruning:\n"
- + RelOptUtil.toString(calcitePreCboPlan));
- LOG.debug("Plan After Join Reordering:\n"
- + RelOptUtil.toString(calciteOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES));
- }
-
- return calciteOptimizedPlan;
- }
-
- /**
- * Perform all optimizations before Join Ordering.
- *
- * @param basePlan
- * original plan
- * @param mdProvider
- * meta data provider
- * @param executorProvider
- * executor
- * @return
- */
- private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) {
- // TODO: Decorelation of subquery should be done before attempting
- // Partition Pruning; otherwise Expression evaluation may try to execute
- // corelated sub query.
-
- PerfLogger perfLogger = SessionState.getPerfLogger();
-
- final int maxCNFNodeCount = conf.getIntVar(HiveConf.ConfVars.HIVE_CBO_CNF_NODES_LIMIT);
- final int minNumORClauses = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
-
- //0. SetOp rewrite
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, true, mdProvider, null, HepMatchOrder.BOTTOM_UP,
- HiveProjectOverIntersectRemoveRule.INSTANCE, HiveIntersectMergeRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: HiveProjectOverIntersectRemoveRule and HiveIntersectMerge rules");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
- HiveIntersectRewriteRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: HiveIntersectRewrite rule");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
- HiveExceptRewriteRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: HiveExceptRewrite rule");
-
- //1. Distinct aggregate rewrite
- // Run this optimization early, since it is expanding the operator pipeline.
- if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") &&
- conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- // Its not clear, if this rewrite is always performant on MR, since extra map phase
- // introduced for 2nd MR job may offset gains of this multi-stage aggregation.
- // We need a cost model for MR to enable this on MR.
- basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveExpandDistinctAggregatesRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Distinct aggregate rewrite");
- }
-
- // 2. Try factoring out common filter elements & separating deterministic
- // vs non-deterministic UDF. This needs to run before PPD so that PPD can
- // add on-clauses for old style Join Syntax
- // Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or
- // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.ARBITRARY,
- new HivePreFilteringRule(maxCNFNodeCount));
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF");
-
- // 3. Run exhaustive PPD, add not null filters, transitive inference,
- // constant propagation, constant folding
- List<RelOptRule> rules = Lists.newArrayList();
- if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING)) {
- rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING);
- } else {
- rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC);
- }
- rules.add(HiveFilterSetOpTransposeRule.INSTANCE);
- rules.add(HiveFilterSortTransposeRule.INSTANCE);
- rules.add(HiveFilterJoinRule.JOIN);
- rules.add(HiveFilterJoinRule.FILTER_ON_JOIN);
- rules.add(new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class));
- rules.add(new FilterMergeRule(HiveRelFactories.HIVE_BUILDER));
- if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_REDUCE_WITH_STATS)) {
- rules.add(HiveReduceExpressionsWithStatsRule.INSTANCE);
- }
- rules.add(HiveProjectFilterPullUpConstantsRule.INSTANCE);
- rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE);
- rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE);
- rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE);
- if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
- rules.add(new HivePointLookupOptimizerRule.FilterCondition(minNumORClauses));
- rules.add(new HivePointLookupOptimizerRule.JoinCondition(minNumORClauses));
- }
- rules.add(HiveJoinAddNotNullRule.INSTANCE_JOIN);
- rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN);
- rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN);
- rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN);
- rules.add(HiveSortMergeRule.INSTANCE);
- rules.add(HiveSortLimitPullUpConstantsRule.INSTANCE);
- rules.add(HiveUnionPullUpConstantsRule.INSTANCE);
- rules.add(HiveAggregatePullUpConstantsRule.INSTANCE);
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
- rules.toArray(new RelOptRule[rules.size()]));
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding");
-
- // 4. Push down limit through outer join
- // NOTE: We run this after PPD to support old style join syntax.
- // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or
- // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10
- if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE)) {
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- // This should be a cost based decision, but till we enable the extended cost
- // model, we will use the given value for the variable
- final float reductionProportion = HiveConf.getFloatVar(conf,
- HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE);
- final long reductionTuples = HiveConf.getLongVar(conf,
- HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES);
- basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveSortMergeRule.INSTANCE,
- HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE,
- HiveSortUnionReduceRule.INSTANCE);
- basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
- new HiveSortRemoveRule(reductionProportion, reductionTuples),
- HiveProjectSortTransposeRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Push down limit through outer join");
- }
-
- // 5. Push Down Semi Joins
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, SemiJoinJoinTransposeRule.INSTANCE,
- SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Push Down Semi Joins");
-
- // 6. Apply Partition Pruning
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, new HivePartitionPruneRule(conf));
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Partition Pruning");
-
- // 7. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP)
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
- HiveRelFactories.HIVE_BUILDER.create(cluster, null),
- profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING));
- basePlan = fieldTrimmer.trim(basePlan);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Projection Pruning");
-
- // 8. Merge, remove and reduce Project if possible
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, false, mdProvider, executorProvider,
- HiveProjectMergeRule.INSTANCE, ProjectRemoveRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Merge Project-Project");
-
- // 9. Rerun PPD through Project as column pruning would have introduced
- // DT above scans; By pushing filter just above TS, Hive can push it into
- // storage (incase there are filters on non partition cols). This only
- // matches FIL-PROJ-TS
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
- basePlan = hepPlan(basePlan, true, mdProvider, executorProvider,
- HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID,
- HiveProjectFilterPullUpConstantsRule.INSTANCE);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Rerun PPD");
-
- return basePlan;
- }
-
- /**
- * Run the HEP Planner with the given rule set.
- *
- * @param basePlan
- * @param followPlanChanges
- * @param mdProvider
- * @param executorProvider
- * @param rules
- * @return optimized RelNode
- */
- private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges,
- RelMetadataProvider mdProvider, RexExecutor executorProvider, RelOptRule... rules) {
- return hepPlan(basePlan, followPlanChanges, mdProvider, executorProvider,
- HepMatchOrder.TOP_DOWN, rules);
- }
-
- /**
- * Run the HEP Planner with the given rule set.
- *
- * @param basePlan
- * @param followPlanChanges
- * @param mdProvider
- * @param executorProvider
- * @param order
- * @param rules
- * @return optimized RelNode
- */
- private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges,
- RelMetadataProvider mdProvider, RexExecutor executorProvider, HepMatchOrder order,
- RelOptRule... rules) {
-
- RelNode optimizedRelNode = basePlan;
- HepProgramBuilder programBuilder = new HepProgramBuilder();
- if (followPlanChanges) {
- programBuilder.addMatchOrder(order);
- programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules));
- } else {
- // TODO: Should this be also TOP_DOWN?
- for (RelOptRule r : rules)
- programBuilder.addRuleInstance(r);
- }
-
- // Create planner and copy context
- HepPlanner planner = new HepPlanner(programBuilder.build(),
- basePlan.getCluster().getPlanner().getContext());
-
- List<RelMetadataProvider> list = Lists.newArrayList();
- list.add(mdProvider);
- planner.registerMetadataProviders(list);
- RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list);
- basePlan.getCluster().setMetadataProvider(
- new CachingRelMetadataProvider(chainedProvider, planner));
-
- if (executorProvider != null) {
- // basePlan.getCluster.getPlanner is the VolcanoPlanner from apply()
- // both planners need to use the correct executor
- basePlan.getCluster().getPlanner().setExecutor(executorProvider);
- planner.setExecutor(executorProvider);
- }
-
- planner.setRoot(basePlan);
- optimizedRelNode = planner.findBestExp();
-
- return optimizedRelNode;
- }
-
- @SuppressWarnings("nls")
- private RelNode genSetOpLogicalPlan(Opcode opcode, String alias, String leftalias, RelNode leftRel,
- String rightalias, RelNode rightRel) throws SemanticException {
- // 1. Get Row Resolvers, Column map for original left and right input of
- // SetOp Rel
- RowResolver leftRR = this.relToHiveRR.get(leftRel);
- RowResolver rightRR = this.relToHiveRR.get(rightRel);
- HashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias);
- HashMap<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias);
-
- // 2. Validate that SetOp is feasible according to Hive (by using type
- // info from RR)
- if (leftmap.size() != rightmap.size()) {
- throw new SemanticException("Schema of both sides of union should match.");
- }
-
- ASTNode tabref = getQB().getAliases().isEmpty() ? null : getQB().getParseInfo()
- .getSrcForAlias(getQB().getAliases().get(0));
-
- // 3. construct SetOp Output RR using original left & right Input
- RowResolver setOpOutRR = new RowResolver();
-
- Iterator<Map.Entry<String, ColumnInfo>> lIter = leftmap.entrySet().iterator();
- Iterator<Map.Entry<String, ColumnInfo>> rIter = rightmap.entrySet().iterator();
- while (lIter.hasNext()) {
- Map.Entry<String, ColumnInfo> lEntry = lIter.next();
- Map.Entry<String, ColumnInfo> rEntry = rIter.next();
- ColumnInfo lInfo = lEntry.getValue();
- ColumnInfo rInfo = rEntry.getValue();
-
- String field = lEntry.getKey();
- // try widening conversion, otherwise fail union
- TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(),
- rInfo.getType());
- if (commonTypeInfo == null) {
- throw new SemanticException(generateErrorMessage(tabref,
- "Schema of both sides of setop should match: Column " + field
- + " is of type " + lInfo.getType().getTypeName()
- + " on first table and type " + rInfo.getType().getTypeName()
- + " on second table"));
- }
- ColumnInfo setOpColInfo = new ColumnInfo(lInfo);
- setOpColInfo.setType(commonTypeInfo);
- setOpOutRR.put(alias, field, setOpColInfo);
- }
-
- // 4. Determine which columns requires cast on left/right input (Calcite
- // requires exact types on both sides of SetOp)
- boolean leftNeedsTypeCast = false;
- boolean rightNeedsTypeCast = false;
- List<RexNode> leftProjs = new ArrayList<RexNode>();
- List<RexNode> rightProjs = new ArrayList<RexNode>();
- List<RelDataTypeField> leftRowDT = leftRel.getRowType().getFieldList();
- List<RelDataTypeField> rightRowDT = rightRel.getRowType().getFieldList();
-
- RelDataType leftFieldDT;
- RelDataType rightFieldDT;
- RelDataType unionFieldDT;
- for (int i = 0; i < leftRowDT.size(); i++) {
- leftFieldDT = leftRowDT.get(i).getType();
- rightFieldDT = rightRowDT.get(i).getType();
- if (!leftFieldDT.equals(rightFieldDT)) {
- unionFieldDT = TypeConverter.convert(setOpOutRR.getColumnInfos().get(i).getType(),
- cluster.getTypeFactory());
- if (!unionFieldDT.equals(leftFieldDT)) {
- leftNeedsTypeCast = true;
- }
- leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT,
- cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true));
-
- if (!unionFieldDT.equals(rightFieldDT)) {
- rightNeedsTypeCast = true;
- }
- rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT,
- cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true));
- } else {
- leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldDT,
- cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true));
- rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldDT,
- cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true));
- }
- }
-
- // 5. Introduce Project Rel above original left/right inputs if cast is
- // needed for type parity
- RelNode setOpLeftInput = leftRel;
- RelNode setOpRightInput = rightRel;
- if (leftNeedsTypeCast) {
- setOpLeftInput = HiveProject.create(leftRel, leftProjs, leftRel.getRowType()
- .getFieldNames());
- }
- if (rightNeedsTypeCast) {
- setOpRightInput = HiveProject.create(rightRel, rightProjs, rightRel.getRowType()
- .getFieldNames());
- }
-
- // 6. Construct SetOp Rel
- Builder<RelNode> bldr = new ImmutableList.Builder<RelNode>();
- bldr.add(setOpLeftInput);
- bldr.add(setOpRightInput);
- SetOp setOpRel = null;
- switch (opcode) {
- case UNION:
- setOpRel = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build());
- break;
- case INTERSECT:
- setOpRel = new HiveIntersect(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build(),
- false)
<TRUNCATED>
[3/3] hive git commit: HIVE-16501 : Add rej/orig to .gitignore ;
remove *.orig files (Zoltan Haindrich via Ashutosh Chauhan)
Posted by ha...@apache.org.
HIVE-16501 : Add rej/orig to .gitignore ; remove *.orig files (Zoltan Haindrich via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c911f420
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c911f420
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c911f420
Branch: refs/heads/master
Commit: c911f42035d9e7b91191e2683f85d8fd2a35eb27
Parents: f1e0b4f
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Tue Apr 25 08:45:10 2017 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Apr 25 08:45:10 2017 -0700
----------------------------------------------------------------------
.gitignore | 2 +
.../hive/ql/parse/CalcitePlanner.java.orig | 4188 -----
.../hive/ql/parse/SemanticAnalyzer.java.orig | 13508 -----------------
3 files changed, 2 insertions(+), 17696 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c911f420/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 47c59da..8578a64 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,5 @@ hcatalog/webhcat/svr/target
conf/hive-default.xml.template
itests/hive-blobstore/src/test/resources/blobstore-conf.xml
.DS_Store
+*.rej
+*.orig