You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by li...@apache.org on 2016/09/10 06:33:27 UTC
[1/2] hive git commit: HIVE-14728: Redundant orig files (Rui reviewed
by Pengcheng)
Repository: hive
Updated Branches:
refs/heads/master 407cfe185 -> bc75e46ad
http://git-wip-us.apache.org/repos/asf/hive/blob/bc75e46a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig
deleted file mode 100644
index 699bb11..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig
+++ /dev/null
@@ -1,13038 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.parse;
-
-import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.Serializable;
-import java.security.AccessControlException;
-import java.util.ArrayDeque;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Deque;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Queue;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.UUID;
-import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
-
-import org.antlr.runtime.ClassicToken;
-import org.antlr.runtime.CommonToken;
-import org.antlr.runtime.Token;
-import org.antlr.runtime.tree.Tree;
-import org.antlr.runtime.tree.TreeWizard;
-import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsAction;
-import org.apache.hadoop.hive.common.BlobStorageUtils;
-import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
-import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.hive.common.ObjectPair;
-import org.apache.hadoop.hive.common.StatsSetupConst;
-import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-import org.apache.hadoop.hive.metastore.TableType;
-import org.apache.hadoop.hive.metastore.Warehouse;
-import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.Order;
-import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
-import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
-import org.apache.hadoop.hive.ql.CompilationOpContext;
-import org.apache.hadoop.hive.ql.ErrorMsg;
-import org.apache.hadoop.hive.ql.QueryProperties;
-import org.apache.hadoop.hive.ql.QueryState;
-import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
-import org.apache.hadoop.hive.ql.exec.FetchTask;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.OperatorFactory;
-import org.apache.hadoop.hive.ql.exec.RecordReader;
-import org.apache.hadoop.hive.ql.exec.RecordWriter;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.exec.TaskFactory;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.hooks.ReadEntity;
-import org.apache.hadoop.hive.ql.hooks.WriteEntity;
-import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
-import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.io.AcidUtils.Operation;
-import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
-import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
-import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
-import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
-import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
-import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
-import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.GraphWalker;
-import org.apache.hadoop.hive.ql.lib.Node;
-import org.apache.hadoop.hive.ql.metadata.DummyPartition;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.HiveUtils;
-import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
-import org.apache.hadoop.hive.ql.metadata.Partition;
-import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
-import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import org.apache.hadoop.hive.ql.optimizer.Optimizer;
-import org.apache.hadoop.hive.ql.optimizer.Transform;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
-import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc;
-import org.apache.hadoop.hive.ql.optimizer.lineage.Generator;
-import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
-import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType;
-import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec;
-import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;
-import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType;
-import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.CurrentRowSpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.ValueBoundarySpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
-import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
-import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
-import org.apache.hadoop.hive.ql.plan.CreateViewDesc;
-import org.apache.hadoop.hive.ql.plan.DDLWork;
-import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
-import org.apache.hadoop.hive.ql.plan.FilterDesc;
-import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc;
-import org.apache.hadoop.hive.ql.plan.ForwardDesc;
-import org.apache.hadoop.hive.ql.plan.GroupByDesc;
-import org.apache.hadoop.hive.ql.plan.HiveOperation;
-import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
-import org.apache.hadoop.hive.ql.plan.JoinDesc;
-import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc;
-import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
-import org.apache.hadoop.hive.ql.plan.LimitDesc;
-import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
-import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
-import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-import org.apache.hadoop.hive.ql.plan.PTFDesc;
-import org.apache.hadoop.hive.ql.plan.PlanUtils;
-import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
-import org.apache.hadoop.hive.ql.plan.ScriptDesc;
-import org.apache.hadoop.hive.ql.plan.SelectDesc;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.plan.UnionDesc;
-import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
-import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
-import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.ql.util.ResourceDownloader;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.Deserializer;
-import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
-import org.apache.hadoop.hive.serde2.NoOpFetchFormatter;
-import org.apache.hadoop.hive.serde2.NullStructSerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.SerDeUtils;
-import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.shims.HadoopShims;
-import org.apache.hadoop.hive.shims.Utils;
-import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.OutputFormat;
-import org.apache.hadoop.security.UserGroupInformation;
-
-import com.google.common.collect.Sets;
-
-/**
- * Implementation of the semantic analyzer. It generates the query plan.
- * There are other specific semantic analyzers for some hive operations such as
- * DDLSemanticAnalyzer for ddl operations.
- */
-
-public class SemanticAnalyzer extends BaseSemanticAnalyzer {
-
- public static final String DUMMY_DATABASE = "_dummy_database";
- public static final String DUMMY_TABLE = "_dummy_table";
- public static final String SUBQUERY_TAG_1 = "-subquery1";
- public static final String SUBQUERY_TAG_2 = "-subquery2";
-
- // Max characters when auto generating the column name with func name
- private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
-
- private static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__";
-
- static final String MATERIALIZATION_MARKER = "$MATERIALIZATION";
-
- private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner;
- private HashMap<TableScanOperator, PrunedPartitionList> opToPartList;
- protected HashMap<String, TableScanOperator> topOps;
- protected LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx;
- private List<LoadTableDesc> loadTableWork;
- private List<LoadFileDesc> loadFileWork;
- private List<ColumnStatsAutoGatherContext> columnStatsAutoGatherContexts;
- private final Map<JoinOperator, QBJoinTree> joinContext;
- private final Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext;
- private final HashMap<TableScanOperator, Table> topToTable;
- private final Map<FileSinkOperator, Table> fsopToTable;
- private final List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting;
- private final HashMap<TableScanOperator, Map<String, String>> topToTableProps;
- private QB qb;
- private ASTNode ast;
- private int destTableId;
- private UnionProcContext uCtx;
- List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer;
- private HashMap<TableScanOperator, SampleDesc> opToSamplePruner;
- private final Map<TableScanOperator, Map<String, ExprNodeDesc>> opToPartToSkewedPruner;
- private Map<SelectOperator, Table> viewProjectToTableSchema;
- /**
- * a map for the split sampling, from alias to an instance of SplitSample
- * that describes percentage and number.
- */
- private final HashMap<String, SplitSample> nameToSplitSample;
- Map<GroupByOperator, Set<String>> groupOpToInputTables;
- Map<String, PrunedPartitionList> prunedPartitions;
- protected List<FieldSchema> resultSchema;
- private CreateViewDesc createVwDesc;
- private ArrayList<String> viewsExpanded;
- private ASTNode viewSelect;
- protected final UnparseTranslator unparseTranslator;
- private final GlobalLimitCtx globalLimitCtx;
-
- // prefix for column names auto generated by hive
- private final String autogenColAliasPrfxLbl;
- private final boolean autogenColAliasPrfxIncludeFuncName;
-
- // Keep track of view alias to read entity corresponding to the view
- // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
- // keeps track of aliases for V3, V3:V2, V3:V2:V1.
- // This is used when T is added as an input for the query, the parents of T is
- // derived from the alias V3:V2:V1:T
- private final Map<String, ReadEntity> viewAliasToInput;
-
- //need merge isDirect flag to input even if the newInput does not have a parent
- private boolean mergeIsDirect;
-
- // flag for no scan during analyze ... compute statistics
- protected boolean noscan;
-
- //flag for partial scan during analyze ... compute statistics
- protected boolean partialscan;
-
- protected volatile boolean disableJoinMerge = false;
-
- /*
- * Capture the CTE definitions in a Query.
- */
- final Map<String, CTEClause> aliasToCTEs;
-
- /*
- * Used to check recursive CTE invocations. Similar to viewsExpanded
- */
- ArrayList<String> ctesExpanded;
-
- /*
- * Whether root tasks after materialized CTE linkage have been resolved
- */
- boolean rootTasksResolved;
-
- protected TableMask tableMask;
-
- CreateTableDesc tableDesc;
-
- /** Not thread-safe. */
- final ASTSearcher astSearcher = new ASTSearcher();
-
- protected AnalyzeRewriteContext analyzeRewrite;
-
- // A mapping from a tableName to a table object in metastore.
- Map<String, Table> tabNameToTabObject;
-
- // The tokens we should ignore when we are trying to do table masking.
- private final Set<Integer> ignoredTokens = Sets.newHashSet(HiveParser.TOK_GROUPBY,
- HiveParser.TOK_ORDERBY, HiveParser.TOK_WINDOWSPEC, HiveParser.TOK_CLUSTERBY,
- HiveParser.TOK_DISTRIBUTEBY, HiveParser.TOK_SORTBY);
-
- static class Phase1Ctx {
- String dest;
- int nextNum;
- }
-
- public SemanticAnalyzer(QueryState queryState) throws SemanticException {
- super(queryState);
- opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>();
- opToPartList = new HashMap<TableScanOperator, PrunedPartitionList>();
- opToSamplePruner = new HashMap<TableScanOperator, SampleDesc>();
- nameToSplitSample = new HashMap<String, SplitSample>();
- // Must be deterministic order maps - see HIVE-8707
- topOps = new LinkedHashMap<String, TableScanOperator>();
- loadTableWork = new ArrayList<LoadTableDesc>();
- loadFileWork = new ArrayList<LoadFileDesc>();
- columnStatsAutoGatherContexts = new ArrayList<ColumnStatsAutoGatherContext>();
- opParseCtx = new LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext>();
- joinContext = new HashMap<JoinOperator, QBJoinTree>();
- smbMapJoinContext = new HashMap<SMBMapJoinOperator, QBJoinTree>();
- // Must be deterministic order map for consistent q-test output across Java versions
- topToTable = new LinkedHashMap<TableScanOperator, Table>();
- fsopToTable = new HashMap<FileSinkOperator, Table>();
- reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList<ReduceSinkOperator>();
- topToTableProps = new HashMap<TableScanOperator, Map<String, String>>();
- destTableId = 1;
- uCtx = null;
- listMapJoinOpsNoReducer = new ArrayList<AbstractMapJoinOperator<? extends MapJoinDesc>>();
- groupOpToInputTables = new HashMap<GroupByOperator, Set<String>>();
- prunedPartitions = new HashMap<String, PrunedPartitionList>();
- tabNameToTabObject = new HashMap<String, Table>();
- unparseTranslator = new UnparseTranslator(conf);
- autogenColAliasPrfxLbl = HiveConf.getVar(conf,
- HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL);
- autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
- HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
- queryProperties = new QueryProperties();
- opToPartToSkewedPruner = new HashMap<TableScanOperator, Map<String, ExprNodeDesc>>();
- aliasToCTEs = new HashMap<String, CTEClause>();
- globalLimitCtx = new GlobalLimitCtx();
- viewAliasToInput = new HashMap<String, ReadEntity>();
- mergeIsDirect = true;
- noscan = partialscan = false;
- tabNameToTabObject = new HashMap<>();
- }
-
- @Override
- protected void reset(boolean clearPartsCache) {
- super.reset(true);
- if(clearPartsCache) {
- prunedPartitions.clear();
-
- //When init(true) combine with genResolvedParseTree, it will generate Resolved Parse tree from syntax tree
- //ReadEntity created under these conditions should be all relevant to the syntax tree even the ones without parents
- //set mergeIsDirect to true here.
- mergeIsDirect = true;
- } else {
- mergeIsDirect = false;
- }
- tabNameToTabObject.clear();
- loadTableWork.clear();
- loadFileWork.clear();
- columnStatsAutoGatherContexts.clear();
- topOps.clear();
- destTableId = 1;
- idToTableNameMap.clear();
- qb = null;
- ast = null;
- uCtx = null;
- joinContext.clear();
- smbMapJoinContext.clear();
- opParseCtx.clear();
- groupOpToInputTables.clear();
- disableJoinMerge = false;
- aliasToCTEs.clear();
- topToTable.clear();
- opToPartPruner.clear();
- opToPartList.clear();
- opToPartToSkewedPruner.clear();
- opToSamplePruner.clear();
- nameToSplitSample.clear();
- fsopToTable.clear();
- resultSchema = null;
- createVwDesc = null;
- viewsExpanded = null;
- viewSelect = null;
- ctesExpanded = null;
- globalLimitCtx.disableOpt();
- viewAliasToInput.clear();
- reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
- topToTableProps.clear();
- listMapJoinOpsNoReducer.clear();
- unparseTranslator.clear();
- queryProperties.clear();
- outputs.clear();
- }
-
- public void initParseCtx(ParseContext pctx) {
- opToPartPruner = pctx.getOpToPartPruner();
- opToPartList = pctx.getOpToPartList();
- opToSamplePruner = pctx.getOpToSamplePruner();
- topOps = pctx.getTopOps();
- loadTableWork = pctx.getLoadTableWork();
- loadFileWork = pctx.getLoadFileWork();
- ctx = pctx.getContext();
- destTableId = pctx.getDestTableId();
- idToTableNameMap = pctx.getIdToTableNameMap();
- uCtx = pctx.getUCtx();
- listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
- prunedPartitions = pctx.getPrunedPartitions();
- tabNameToTabObject = pctx.getTabNameToTabObject();
- fetchTask = pctx.getFetchTask();
- setLineageInfo(pctx.getLineageInfo());
- }
-
- public ParseContext getParseContext() {
- // Make sure the basic query properties are initialized
- copyInfoToQueryProperties(queryProperties);
- return new ParseContext(queryState, opToPartPruner, opToPartList, topOps,
- new HashSet<JoinOperator>(joinContext.keySet()),
- new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()),
- loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx,
- listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject,
- opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
- opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting,
- analyzeRewrite, tableDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
- }
-
- public CompilationOpContext getOpContext() {
- return ctx.getOpContext();
- }
-
- public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias)
- throws SemanticException {
- doPhase1QBExpr(ast, qbexpr, id, alias, false);
- }
- @SuppressWarnings("nls")
- public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, boolean insideView)
- throws SemanticException {
-
- assert (ast.getToken() != null);
- switch (ast.getToken().getType()) {
- case HiveParser.TOK_QUERY: {
- QB qb = new QB(id, alias, true);
- qb.setInsideView(insideView);
- Phase1Ctx ctx_1 = initPhase1Ctx();
- doPhase1(ast, qb, ctx_1, null);
-
- qbexpr.setOpcode(QBExpr.Opcode.NULLOP);
- qbexpr.setQB(qb);
- }
- break;
- case HiveParser.TOK_UNIONALL: {
- qbexpr.setOpcode(QBExpr.Opcode.UNION);
- // query 1
- assert (ast.getChild(0) != null);
- QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1);
- doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + SUBQUERY_TAG_1,
- alias + SUBQUERY_TAG_1, insideView);
- qbexpr.setQBExpr1(qbexpr1);
-
- // query 2
- assert (ast.getChild(1) != null);
- QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2);
- doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + SUBQUERY_TAG_2,
- alias + SUBQUERY_TAG_2, insideView);
- qbexpr.setQBExpr2(qbexpr2);
- }
- break;
- }
- }
-
- private LinkedHashMap<String, ASTNode> doPhase1GetAggregationsFromSelect(
- ASTNode selExpr, QB qb, String dest) throws SemanticException {
-
- // Iterate over the selects search for aggregation Trees.
- // Use String as keys to eliminate duplicate trees.
- LinkedHashMap<String, ASTNode> aggregationTrees = new LinkedHashMap<String, ASTNode>();
- List<ASTNode> wdwFns = new ArrayList<ASTNode>();
- for (int i = 0; i < selExpr.getChildCount(); ++i) {
- ASTNode function = (ASTNode) selExpr.getChild(i);
- if (function.getType() == HiveParser.TOK_SELEXPR ||
- function.getType() == HiveParser.TOK_SUBQUERY_EXPR) {
- function = (ASTNode)function.getChild(0);
- }
- doPhase1GetAllAggregations(function, aggregationTrees, wdwFns);
- }
-
- // window based aggregations are handled differently
- for (ASTNode wdwFn : wdwFns) {
- WindowingSpec spec = qb.getWindowingSpec(dest);
- if(spec == null) {
- queryProperties.setHasWindowing(true);
- spec = new WindowingSpec();
- qb.addDestToWindowingSpec(dest, spec);
- }
- HashMap<String, ASTNode> wExprsInDest = qb.getParseInfo().getWindowingExprsForClause(dest);
- int wColIdx = spec.getWindowExpressions() == null ? 0 : spec.getWindowExpressions().size();
- WindowFunctionSpec wFnSpec = processWindowFunction(wdwFn,
- (ASTNode)wdwFn.getChild(wdwFn.getChildCount()-1));
- // If this is a duplicate invocation of a function; don't add to WindowingSpec.
- if ( wExprsInDest != null &&
- wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) {
- continue;
- }
- wFnSpec.setAlias(wFnSpec.getName() + "_window_" + wColIdx);
- spec.addWindowFunction(wFnSpec);
- qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression());
- }
-
- return aggregationTrees;
- }
-
- private void doPhase1GetColumnAliasesFromSelect(
- ASTNode selectExpr, QBParseInfo qbp) {
- for (int i = 0; i < selectExpr.getChildCount(); ++i) {
- ASTNode selExpr = (ASTNode) selectExpr.getChild(i);
- if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR)
- && (selExpr.getChildCount() == 2)) {
- String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText());
- qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias);
- }
- }
- }
-
- /**
- * DFS-scan the expressionTree to find all aggregation subtrees and put them
- * in aggregations.
- *
- * @param expressionTree
- * @param aggregations
- * the key to the HashTable is the toStringTree() representation of
- * the aggregation subtree.
- * @throws SemanticException
- */
- private void doPhase1GetAllAggregations(ASTNode expressionTree,
- HashMap<String, ASTNode> aggregations, List<ASTNode> wdwFns) throws SemanticException {
- int exprTokenType = expressionTree.getToken().getType();
- if (exprTokenType == HiveParser.TOK_FUNCTION
- || exprTokenType == HiveParser.TOK_FUNCTIONDI
- || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
- assert (expressionTree.getChildCount() != 0);
- if (expressionTree.getChild(expressionTree.getChildCount()-1).getType()
- == HiveParser.TOK_WINDOWSPEC) {
- // If it is a windowing spec, we include it in the list
- // Further, we will examine its children AST nodes to check whether
- // there are aggregation functions within
- wdwFns.add(expressionTree);
- doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(expressionTree.getChildCount()-1),
- aggregations, wdwFns);
- return;
- }
- if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {
- String functionName = unescapeIdentifier(expressionTree.getChild(0)
- .getText());
- // Validate the function name
- if (FunctionRegistry.getFunctionInfo(functionName) == null) {
- throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName));
- }
- if(FunctionRegistry.impliesOrder(functionName)) {
- throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
- }
- if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
- if(containsLeadLagUDF(expressionTree)) {
- throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
- }
- aggregations.put(expressionTree.toStringTree(), expressionTree);
- FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName);
- if (!fi.isNative()) {
- unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree
- .getChild(0));
- }
- return;
- }
- }
- }
- for (int i = 0; i < expressionTree.getChildCount(); i++) {
- doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i),
- aggregations, wdwFns);
- }
- }
-
- private List<ASTNode> doPhase1GetDistinctFuncExprs(
- HashMap<String, ASTNode> aggregationTrees) throws SemanticException {
- List<ASTNode> exprs = new ArrayList<ASTNode>();
- for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
- ASTNode value = entry.getValue();
- assert (value != null);
- if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) {
- exprs.add(value);
- }
- }
- return exprs;
- }
-
- public static String generateErrorMessage(ASTNode ast, String message) {
- StringBuilder sb = new StringBuilder();
- if (ast == null) {
- sb.append(message).append(". Cannot tell the position of null AST.");
- return sb.toString();
- }
- sb.append(ast.getLine());
- sb.append(":");
- sb.append(ast.getCharPositionInLine());
- sb.append(" ");
- sb.append(message);
- sb.append(". Error encountered near token '");
- sb.append(ErrorMsg.getText(ast));
- sb.append("'");
- return sb.toString();
- }
-
- ASTNode getAST() {
- return this.ast;
- }
-
- protected void setAST(ASTNode newAST) {
- this.ast = newAST;
- }
-
- /**
- * Goes though the tabref tree and finds the alias for the table. Once found,
- * it records the table name-> alias association in aliasToTabs. It also makes
- * an association from the alias to the table AST in parse info.
- *
- * @return the alias of the table
- */
- private String processTable(QB qb, ASTNode tabref) throws SemanticException {
- // For each table reference get the table name
- // and the alias (if alias is not present, the table name
- // is used as an alias)
- int aliasIndex = 0;
- int propsIndex = -1;
- int tsampleIndex = -1;
- int ssampleIndex = -1;
- for (int index = 1; index < tabref.getChildCount(); index++) {
- ASTNode ct = (ASTNode) tabref.getChild(index);
- if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE) {
- tsampleIndex = index;
- } else if (ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE) {
- ssampleIndex = index;
- } else if (ct.getToken().getType() == HiveParser.TOK_TABLEPROPERTIES) {
- propsIndex = index;
- } else {
- aliasIndex = index;
- }
- }
-
- ASTNode tableTree = (ASTNode) (tabref.getChild(0));
-
- String tabIdName = getUnescapedName(tableTree).toLowerCase();
-
- String alias;
- if (aliasIndex != 0) {
- alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText());
- }
- else {
- alias = getUnescapedUnqualifiedTableName(tableTree);
- }
-
- if (propsIndex >= 0) {
- Tree propsAST = tabref.getChild(propsIndex);
- Map<String, String> props = DDLSemanticAnalyzer.getProps((ASTNode) propsAST.getChild(0));
- // We get the information from Calcite.
- if ("TRUE".equals(props.get("insideView"))) {
- qb.getAliasInsideView().add(alias.toLowerCase());
- }
- qb.setTabProps(alias, props);
- }
-
- // If the alias is already there then we have a conflict
- if (qb.exists(alias)) {
- throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref
- .getChild(aliasIndex)));
- }
- if (tsampleIndex >= 0) {
- ASTNode sampleClause = (ASTNode) tabref.getChild(tsampleIndex);
- ArrayList<ASTNode> sampleCols = new ArrayList<ASTNode>();
- if (sampleClause.getChildCount() > 2) {
- for (int i = 2; i < sampleClause.getChildCount(); i++) {
- sampleCols.add((ASTNode) sampleClause.getChild(i));
- }
- }
- // TODO: For now only support sampling on up to two columns
- // Need to change it to list of columns
- if (sampleCols.size() > 2) {
- throw new SemanticException(generateErrorMessage(
- (ASTNode) tabref.getChild(0),
- ErrorMsg.SAMPLE_RESTRICTION.getMsg()));
- }
- TableSample tabSample = new TableSample(
- unescapeIdentifier(sampleClause.getChild(0).getText()),
- unescapeIdentifier(sampleClause.getChild(1).getText()),
- sampleCols);
- qb.getParseInfo().setTabSample(alias, tabSample);
- if (unparseTranslator.isEnabled()) {
- for (ASTNode sampleCol : sampleCols) {
- unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol
- .getChild(0));
- }
- }
- } else if (ssampleIndex >= 0) {
- ASTNode sampleClause = (ASTNode) tabref.getChild(ssampleIndex);
-
- Tree type = sampleClause.getChild(0);
- Tree numerator = sampleClause.getChild(1);
- String value = unescapeIdentifier(numerator.getText());
-
-
- SplitSample sample;
- if (type.getType() == HiveParser.TOK_PERCENT) {
- assertCombineInputFormat(numerator, "Percentage");
- Double percent = Double.valueOf(value).doubleValue();
- if (percent < 0 || percent > 100) {
- throw new SemanticException(generateErrorMessage((ASTNode) numerator,
- "Sampling percentage should be between 0 and 100"));
- }
- int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
- sample = new SplitSample(percent, seedNum);
- } else if (type.getType() == HiveParser.TOK_ROWCOUNT) {
- sample = new SplitSample(Integer.parseInt(value));
- } else {
- assert type.getType() == HiveParser.TOK_LENGTH;
- assertCombineInputFormat(numerator, "Total Length");
- long length = Integer.parseInt(value.substring(0, value.length() - 1));
- char last = value.charAt(value.length() - 1);
- if (last == 'k' || last == 'K') {
- length <<= 10;
- } else if (last == 'm' || last == 'M') {
- length <<= 20;
- } else if (last == 'g' || last == 'G') {
- length <<= 30;
- }
- int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
- sample = new SplitSample(length, seedNum);
- }
- String alias_id = getAliasId(alias, qb);
- nameToSplitSample.put(alias_id, sample);
- }
- // Insert this map into the stats
- qb.setTabAlias(alias, tabIdName);
- if (qb.isInsideView()) {
- qb.getAliasInsideView().add(alias.toLowerCase());
- }
- qb.addAlias(alias);
-
- qb.getParseInfo().setSrcForAlias(alias, tableTree);
-
- // if alias to CTE contains the alias, we do not do the translation because
- // cte is actually a subquery.
- if (!this.aliasToCTEs.containsKey(alias)) {
- unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase());
- if (aliasIndex != 0) {
- unparseTranslator.addIdentifierTranslation((ASTNode) tabref.getChild(aliasIndex));
- }
- }
-
- return alias;
- }
-
- Map<String, SplitSample> getNameToSplitSampleMap() {
- return this.nameToSplitSample;
- }
-
- /**
- * Convert a string to Text format and write its bytes in the same way TextOutputFormat would do.
- * This is needed to properly encode non-ascii characters.
- */
- private static void writeAsText(String text, FSDataOutputStream out) throws IOException {
- Text to = new Text(text);
- out.write(to.getBytes(), 0, to.getLength());
- }
-
- /**
- * Generate a temp table out of a value clause
- * See also {@link #preProcessForInsert(ASTNode, QB)}
- */
- private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws SemanticException {
- Path dataDir = null;
- if(!qb.getEncryptedTargetTablePaths().isEmpty()) {
- //currently only Insert into T values(...) is supported thus only 1 values clause
- //and only 1 target table are possible. If/when support for
- //select ... from values(...) is added an insert statement may have multiple
- //encrypted target tables.
- dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri());
- }
- // Pick a name for the table
- SessionState ss = SessionState.get();
- String tableName = VALUES_TMP_TABLE_NAME_PREFIX + ss.getNextValuesTempTableSuffix();
-
- // Step 1, parse the values clause we were handed
- List<? extends Node> fromChildren = originalFrom.getChildren();
- // First child should be the virtual table ref
- ASTNode virtualTableRef = (ASTNode)fromChildren.get(0);
- assert virtualTableRef.getToken().getType() == HiveParser.TOK_VIRTUAL_TABREF :
- "Expected first child of TOK_VIRTUAL_TABLE to be TOK_VIRTUAL_TABREF but was " +
- virtualTableRef.getName();
-
- List<? extends Node> virtualTableRefChildren = virtualTableRef.getChildren();
- // First child of this should be the table name. If it's anonymous,
- // then we don't have a table name.
- ASTNode tabName = (ASTNode)virtualTableRefChildren.get(0);
- if (tabName.getToken().getType() != HiveParser.TOK_ANONYMOUS) {
- // TODO, if you want to make select ... from (values(...) as foo(...) work,
- // you need to parse this list of columns names and build it into the table
- throw new SemanticException(ErrorMsg.VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED.getMsg());
- }
-
- // The second child of the TOK_VIRTUAL_TABLE should be TOK_VALUES_TABLE
- ASTNode valuesTable = (ASTNode)fromChildren.get(1);
- assert valuesTable.getToken().getType() == HiveParser.TOK_VALUES_TABLE :
- "Expected second child of TOK_VIRTUAL_TABLE to be TOK_VALUE_TABLE but was " +
- valuesTable.getName();
- // Each of the children of TOK_VALUES_TABLE will be a TOK_VALUE_ROW
- List<? extends Node> valuesTableChildren = valuesTable.getChildren();
-
- // Now that we're going to start reading through the rows, open a file to write the rows too
- // If we leave this method before creating the temporary table we need to be sure to clean up
- // this file.
- Path tablePath = null;
- FileSystem fs = null;
- FSDataOutputStream out = null;
- try {
- if(dataDir == null) {
- tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
- }
- else {
- //if target table of insert is encrypted, make sure temporary table data is stored
- //similarly encrypted
- tablePath = Warehouse.getDnsPath(new Path(dataDir, tableName), conf);
- }
- fs = tablePath.getFileSystem(conf);
- fs.mkdirs(tablePath);
- Path dataFile = new Path(tablePath, "data_file");
- out = fs.create(dataFile);
- List<FieldSchema> fields = new ArrayList<FieldSchema>();
-
- boolean firstRow = true;
- for (Node n : valuesTableChildren) {
- ASTNode valuesRow = (ASTNode) n;
- assert valuesRow.getToken().getType() == HiveParser.TOK_VALUE_ROW :
- "Expected child of TOK_VALUE_TABLE to be TOK_VALUE_ROW but was " + valuesRow.getName();
- // Each of the children of this should be a literal
- List<? extends Node> valuesRowChildren = valuesRow.getChildren();
- boolean isFirst = true;
- int nextColNum = 1;
- for (Node n1 : valuesRowChildren) {
- ASTNode value = (ASTNode) n1;
- if (firstRow) {
- fields.add(new FieldSchema("tmp_values_col" + nextColNum++, "string", ""));
- }
- if (isFirst) isFirst = false;
- else writeAsText("\u0001", out);
- writeAsText(unparseExprForValuesClause(value), out);
- }
- writeAsText("\n", out);
- firstRow = false;
- }
-
- // Step 2, create a temp table, using the created file as the data
- StorageFormat format = new StorageFormat(conf);
- format.processStorageFormat("TextFile");
- Table table = db.newTable(tableName);
- table.setSerializationLib(format.getSerde());
- table.setFields(fields);
- table.setDataLocation(tablePath);
- table.getTTable().setTemporary(true);
- table.setStoredAsSubDirectories(false);
- table.setInputFormatClass(format.getInputFormat());
- table.setOutputFormatClass(format.getOutputFormat());
- db.createTable(table, false);
- } catch (Exception e) {
- String errMsg = ErrorMsg.INSERT_CANNOT_CREATE_TEMP_FILE.getMsg() + e.getMessage();
- LOG.error(errMsg);
- // Try to delete the file
- if (fs != null && tablePath != null) {
- try {
- fs.delete(tablePath, false);
- } catch (IOException swallowIt) {}
- }
- throw new SemanticException(errMsg, e);
- } finally {
- IOUtils.closeStream(out);
- }
-
- // Step 3, return a new subtree with a from clause built around that temp table
- // The form of the tree is TOK_TABREF->TOK_TABNAME->identifier(tablename)
- Token t = new ClassicToken(HiveParser.TOK_TABREF);
- ASTNode tabRef = new ASTNode(t);
- t = new ClassicToken(HiveParser.TOK_TABNAME);
- ASTNode tabNameNode = new ASTNode(t);
- tabRef.addChild(tabNameNode);
- t = new ClassicToken(HiveParser.Identifier, tableName);
- ASTNode identifier = new ASTNode(t);
- tabNameNode.addChild(identifier);
- return tabRef;
- }
-
- // Take an expression in the values clause and turn it back into a string. This is far from
- // comprehensive. At the moment it only supports:
- // * literals (all types)
- // * unary negatives
- // * true/false
- private String unparseExprForValuesClause(ASTNode expr) throws SemanticException {
- switch (expr.getToken().getType()) {
- case HiveParser.Number:
- return expr.getText();
-
- case HiveParser.StringLiteral:
- return BaseSemanticAnalyzer.unescapeSQLString(expr.getText());
-
- case HiveParser.KW_FALSE:
- // UDFToBoolean casts any non-empty string to true, so set this to false
- return "";
-
- case HiveParser.KW_TRUE:
- return "TRUE";
-
- case HiveParser.MINUS:
- return "-" + unparseExprForValuesClause((ASTNode)expr.getChildren().get(0));
-
- case HiveParser.TOK_NULL:
- // Hive's text input will translate this as a null
- return "\\N";
-
- default:
- throw new SemanticException("Expression of type " + expr.getText() +
- " not supported in insert/values");
- }
-
- }
-
- private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException {
- String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ?
- HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT):
- HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT);
- if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) {
- throw new SemanticException(generateErrorMessage((ASTNode) numerator,
- message + " sampling is not supported in " + inputFormat));
- }
- }
-
- private String processSubQuery(QB qb, ASTNode subq) throws SemanticException {
-
- // This is a subquery and must have an alias
- if (subq.getChildCount() != 2) {
- throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq));
- }
- ASTNode subqref = (ASTNode) subq.getChild(0);
- String alias = unescapeIdentifier(subq.getChild(1).getText());
-
- // Recursively do the first phase of semantic analysis for the subquery
- QBExpr qbexpr = new QBExpr(alias);
-
- doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias);
-
- // If the alias is already there then we have a conflict
- if (qb.exists(alias)) {
- throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq
- .getChild(1)));
- }
- // Insert this map into the stats
- qb.setSubqAlias(alias, qbexpr);
- qb.addAlias(alias);
-
- unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1));
-
- return alias;
- }
-
- /*
- * Phase1: hold onto any CTE definitions in aliasToCTE.
- * CTE definitions are global to the Query.
- */
- private void processCTE(QB qb, ASTNode ctes) throws SemanticException {
-
- int numCTEs = ctes.getChildCount();
-
- for(int i=0; i <numCTEs; i++) {
- ASTNode cte = (ASTNode) ctes.getChild(i);
- ASTNode cteQry = (ASTNode) cte.getChild(0);
- String alias = unescapeIdentifier(cte.getChild(1).getText());
-
- String qName = qb.getId() == null ? "" : qb.getId() + ":";
- qName += alias.toLowerCase();
-
- if ( aliasToCTEs.containsKey(qName)) {
- throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(cte.getChild(1)));
- }
- aliasToCTEs.put(qName, new CTEClause(qName, cteQry));
- }
- }
-
- /*
- * We allow CTE definitions in views. So we can end up with a hierarchy of CTE definitions:
- * - at the top level of a query statement
- * - where a view is referenced.
- * - views may refer to other views.
- *
- * The scoping rules we use are: to search for a CTE from the current QB outwards. In order to
- * disambiguate between CTES are different levels we qualify(prefix) them with the id of the QB
- * they appear in when adding them to the <code>aliasToCTEs</code> map.
- *
- */
- private CTEClause findCTEFromName(QB qb, String cteName) {
- StringBuilder qId = new StringBuilder();
- if (qb.getId() != null) {
- qId.append(qb.getId());
- }
-
- while (qId.length() > 0) {
- String nm = qId + ":" + cteName;
- CTEClause cte = aliasToCTEs.get(nm);
- if (cte != null) {
- return cte;
- }
- int lastIndex = qId.lastIndexOf(":");
- lastIndex = lastIndex < 0 ? 0 : lastIndex;
- qId.setLength(lastIndex);
- }
- return aliasToCTEs.get(cteName);
- }
-
- /*
- * If a CTE is referenced in a QueryBlock:
- * - add it as a SubQuery for now.
- * - SQ.alias is the alias used in QB. (if no alias is specified,
- * it used the CTE name. Works just like table references)
- * - Adding SQ done by:
- * - copying AST of CTE
- * - setting ASTOrigin on cloned AST.
- * - trigger phase 1 on new QBExpr.
- * - update QB data structs: remove this as a table reference, move it to a SQ invocation.
- */
- private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias)
- throws SemanticException {
- cteAlias = cteAlias == null ? cteName : cteAlias;
- CTEClause cte = findCTEFromName(qb, cteName);
- ASTNode cteQryNode = cte.cteNode;
- QBExpr cteQBExpr = new QBExpr(cteAlias);
- doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias);
- qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr);
- }
-
- private final CTEClause rootClause = new CTEClause(null, null);
-
- @Override
- public List<Task<? extends Serializable>> getAllRootTasks() {
- if (!rootTasksResolved) {
- rootTasks = toRealRootTasks(rootClause.asExecutionOrder());
- rootTasksResolved = true;
- }
- return rootTasks;
- }
-
- @Override
- public HashSet<ReadEntity> getAllInputs() {
- HashSet<ReadEntity> readEntities = new HashSet<ReadEntity>(getInputs());
- for (CTEClause cte : rootClause.asExecutionOrder()) {
- if (cte.source != null) {
- readEntities.addAll(cte.source.getInputs());
- }
- }
- return readEntities;
- }
-
- @Override
- public HashSet<WriteEntity> getAllOutputs() {
- HashSet<WriteEntity> writeEntities = new HashSet<WriteEntity>(getOutputs());
- for (CTEClause cte : rootClause.asExecutionOrder()) {
- if (cte.source != null) {
- writeEntities.addAll(cte.source.getOutputs());
- }
- }
- return writeEntities;
- }
-
- class CTEClause {
- CTEClause(String alias, ASTNode cteNode) {
- this.alias = alias;
- this.cteNode = cteNode;
- }
- String alias;
- ASTNode cteNode;
- boolean materialize;
- int reference;
- QBExpr qbExpr;
- List<CTEClause> parents = new ArrayList<CTEClause>();
-
- // materialized
- Table table;
- SemanticAnalyzer source;
-
- List<Task<? extends Serializable>> getTasks() {
- return source == null ? null : source.rootTasks;
- }
-
- List<CTEClause> asExecutionOrder() {
- List<CTEClause> execution = new ArrayList<CTEClause>();
- asExecutionOrder(new HashSet<CTEClause>(), execution);
- return execution;
- }
-
- void asExecutionOrder(Set<CTEClause> visited, List<CTEClause> execution) {
- for (CTEClause parent : parents) {
- if (visited.add(parent)) {
- parent.asExecutionOrder(visited, execution);
- }
- }
- execution.add(this);
- }
-
- @Override
- public String toString() {
- return alias == null ? "<root>" : alias;
- }
- }
-
- private List<Task<? extends Serializable>> toRealRootTasks(List<CTEClause> execution) {
- List<Task<? extends Serializable>> cteRoots = new ArrayList<>();
- List<Task<? extends Serializable>> cteLeafs = new ArrayList<>();
- List<Task<? extends Serializable>> curTopRoots = null;
- List<Task<? extends Serializable>> curBottomLeafs = null;
- for (int i = 0; i < execution.size(); i++) {
- CTEClause current = execution.get(i);
- if (current.parents.isEmpty() && curTopRoots != null) {
- cteRoots.addAll(curTopRoots);
- cteLeafs.addAll(curBottomLeafs);
- curTopRoots = curBottomLeafs = null;
- }
- List<Task<? extends Serializable>> curTasks = current.getTasks();
- if (curTasks == null) {
- continue;
- }
- if (curTopRoots == null) {
- curTopRoots = curTasks;
- }
- if (curBottomLeafs != null) {
- for (Task<?> topLeafTask : curBottomLeafs) {
- for (Task<?> currentRootTask : curTasks) {
- topLeafTask.addDependentTask(currentRootTask);
- }
- }
- }
- curBottomLeafs = Task.findLeafs(curTasks);
- }
- if (curTopRoots != null) {
- cteRoots.addAll(curTopRoots);
- cteLeafs.addAll(curBottomLeafs);
- }
-
- if (cteRoots.isEmpty()) {
- return rootTasks;
- }
- for (Task<?> cteLeafTask : cteLeafs) {
- for (Task<?> mainRootTask : rootTasks) {
- cteLeafTask.addDependentTask(mainRootTask);
- }
- }
- return cteRoots;
- }
-
- Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
-
- ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE));
-
- ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME));
- tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName)));
-
- ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER));
-
- createTable.addChild(tableName);
- createTable.addChild(temporary);
- createTable.addChild(cte.cteNode);
-
- SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState);
- analyzer.initCtx(ctx);
- analyzer.init(false);
-
- // should share cte contexts
- analyzer.aliasToCTEs.putAll(aliasToCTEs);
-
- HiveOperation operation = queryState.getHiveOperation();
- try {
- analyzer.analyzeInternal(createTable);
- } finally {
- queryState.setCommandType(operation);
- }
-
- Table table = analyzer.tableDesc.toTable(conf);
- Path location = table.getDataLocation();
- try {
- location.getFileSystem(conf).mkdirs(location);
- } catch (IOException e) {
- throw new HiveException(e);
- }
- table.setMaterializedTable(true);
-
- LOG.info(cteName + " will be materialized into " + location);
- cte.table = table;
- cte.source = analyzer;
-
- ctx.addMaterializedTable(cteName, table);
-
- return table;
- }
-
-
- static boolean isJoinToken(ASTNode node) {
- if ((node.getToken().getType() == HiveParser.TOK_JOIN)
- || (node.getToken().getType() == HiveParser.TOK_CROSSJOIN)
- || isOuterJoinToken(node)
- || (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
- || (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
- return true;
- }
-
- return false;
- }
-
- static private boolean isOuterJoinToken(ASTNode node) {
- return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
- || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
- || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN);
- }
-
- /**
- * Given the AST with TOK_JOIN as the root, get all the aliases for the tables
- * or subqueries in the join.
- *
- * @param qb
- * @param join
- * @throws SemanticException
- */
- @SuppressWarnings("nls")
- private void processJoin(QB qb, ASTNode join) throws SemanticException {
- int numChildren = join.getChildCount();
- if ((numChildren != 2) && (numChildren != 3)
- && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) {
- throw new SemanticException(generateErrorMessage(join,
- "Join with multiple children"));
- }
-
- queryProperties.incrementJoinCount(isOuterJoinToken(join));
- for (int num = 0; num < numChildren; num++) {
- ASTNode child = (ASTNode) join.getChild(num);
- if (child.getToken().getType() == HiveParser.TOK_TABREF) {
- processTable(qb, child);
- } else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) {
- processSubQuery(qb, child);
- } else if (child.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
- queryProperties.setHasPTF(true);
- processPTF(qb, child);
- PTFInvocationSpec ptfInvocationSpec = qb.getPTFInvocationSpec(child);
- String inputAlias = ptfInvocationSpec == null ? null :
- ptfInvocationSpec.getFunction().getAlias();;
- if ( inputAlias == null ) {
- throw new SemanticException(generateErrorMessage(child,
- "PTF invocation in a Join must have an alias"));
- }
-
- } else if (child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
- child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
- // SELECT * FROM src1 LATERAL VIEW udtf() AS myTable JOIN src2 ...
- // is not supported. Instead, the lateral view must be in a subquery
- // SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a
- // JOIN src2 ...
- throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN
- .getMsg(join));
- } else if (isJoinToken(child)) {
- processJoin(qb, child);
- }
- }
- }
-
- /**
- * Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the
- * table or subquery in the lateral view and also make a mapping from the
- * alias to all the lateral view AST's.
- *
- * @param qb
- * @param lateralView
- * @return the alias for the table/subquery
- * @throws SemanticException
- */
-
- private String processLateralView(QB qb, ASTNode lateralView)
- throws SemanticException {
- int numChildren = lateralView.getChildCount();
-
- assert (numChildren == 2);
- ASTNode next = (ASTNode) lateralView.getChild(1);
-
- String alias = null;
-
- switch (next.getToken().getType()) {
- case HiveParser.TOK_TABREF:
- alias = processTable(qb, next);
- break;
- case HiveParser.TOK_SUBQUERY:
- alias = processSubQuery(qb, next);
- break;
- case HiveParser.TOK_LATERAL_VIEW:
- case HiveParser.TOK_LATERAL_VIEW_OUTER:
- alias = processLateralView(qb, next);
- break;
- default:
- throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD
- .getMsg(lateralView));
- }
- alias = alias.toLowerCase();
- qb.getParseInfo().addLateralViewForAlias(alias, lateralView);
- qb.addAlias(alias);
- return alias;
- }
-
- /**
- * Phase 1: (including, but not limited to):
- *
- * 1. Gets all the aliases for all the tables / subqueries and makes the
- * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
- * destination and names the clause "inclause" + i 3. Creates a map from a
- * string representation of an aggregation tree to the actual aggregation AST
- * 4. Creates a mapping from the clause name to the select expression AST in
- * destToSelExpr 5. Creates a mapping from a table alias to the lateral view
- * AST's in aliasToLateralViews
- *
- * @param ast
- * @param qb
- * @param ctx_1
- * @throws SemanticException
- */
- @SuppressWarnings({"fallthrough", "nls"})
- public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx)
- throws SemanticException {
-
- boolean phase1Result = true;
- QBParseInfo qbp = qb.getParseInfo();
- boolean skipRecursion = false;
-
- if (ast.getToken() != null) {
- skipRecursion = true;
- switch (ast.getToken().getType()) {
- case HiveParser.TOK_SELECTDI:
- qb.countSelDi();
- // fall through
- case HiveParser.TOK_SELECT:
- qb.countSel();
- qbp.setSelExprForClause(ctx_1.dest, ast);
-
- int posn = 0;
- if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) {
- qbp.setHints((ASTNode) ast.getChild(0));
- posn++;
- }
-
- if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM))
- queryProperties.setUsesScript(true);
-
- LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast,
- qb, ctx_1.dest);
- doPhase1GetColumnAliasesFromSelect(ast, qbp);
- qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
- qbp.setDistinctFuncExprsForClause(ctx_1.dest,
- doPhase1GetDistinctFuncExprs(aggregations));
- break;
-
- case HiveParser.TOK_WHERE:
- qbp.setWhrExprForClause(ctx_1.dest, ast);
- if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty())
- queryProperties.setFilterWithSubQuery(true);
- break;
-
- case HiveParser.TOK_INSERT_INTO:
- String currentDatabase = SessionState.get().getCurrentDatabase();
- String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
- qbp.addInsertIntoTable(tab_name, ast);
-
- case HiveParser.TOK_DESTINATION:
- ctx_1.dest = "insclause-" + ctx_1.nextNum;
- ctx_1.nextNum++;
- boolean isTmpFileDest = false;
- if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {
- ASTNode ch = (ASTNode) ast.getChild(0);
- if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0
- && ch.getChild(0) instanceof ASTNode) {
- ch = (ASTNode) ch.getChild(0);
- isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE;
- } else {
- if (ast.getToken().getType() == HiveParser.TOK_DESTINATION
- && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
- String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
- SessionState.get().getCurrentDatabase());
- qbp.getInsertOverwriteTables().put(fullTableName, ast);
- }
- }
- }
-
- // is there a insert in the subquery
- if (qbp.getIsSubQ() && !isTmpFileDest) {
- throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
- }
-
- if (plannerCtx != null) {
- plannerCtx.setInsertToken(ast, isTmpFileDest);
- }
-
- qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
- handleInsertStatementSpecPhase1(ast, qbp, ctx_1);
- if (qbp.getClauseNamesForDest().size() > 1) {
- queryProperties.setMultiDestQuery(true);
- }
- break;
-
- case HiveParser.TOK_FROM:
- int child_count = ast.getChildCount();
- if (child_count != 1) {
- throw new SemanticException(generateErrorMessage(ast,
- "Multiple Children " + child_count));
- }
-
- // Check if this is a subquery / lateral view
- ASTNode frm = (ASTNode) ast.getChild(0);
- if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
- processTable(qb, frm);
- } else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) {
- // Create a temp table with the passed values in it then rewrite this portion of the
- // tree to be from that table.
- ASTNode newFrom = genValuesTempTable(frm, qb);
- ast.setChild(0, newFrom);
- processTable(qb, newFrom);
- } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
- processSubQuery(qb, frm);
- } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
- frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
- queryProperties.setHasLateralViews(true);
- processLateralView(qb, frm);
- } else if (isJoinToken(frm)) {
- processJoin(qb, frm);
- qbp.setJoinExpr(frm);
- }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
- queryProperties.setHasPTF(true);
- processPTF(qb, frm);
- }
- break;
-
- case HiveParser.TOK_CLUSTERBY:
- // Get the clusterby aliases - these are aliased to the entries in the
- // select list
- queryProperties.setHasClusterBy(true);
- qbp.setClusterByExprForClause(ctx_1.dest, ast);
- break;
-
- case HiveParser.TOK_DISTRIBUTEBY:
- // Get the distribute by aliases - these are aliased to the entries in
- // the
- // select list
- queryProperties.setHasDistributeBy(true);
- qbp.setDistributeByExprForClause(ctx_1.dest, ast);
- if (qbp.getClusterByForClause(ctx_1.dest) != null) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
- } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
- }
- break;
-
- case HiveParser.TOK_SORTBY:
- // Get the sort by aliases - these are aliased to the entries in the
- // select list
- queryProperties.setHasSortBy(true);
- qbp.setSortByExprForClause(ctx_1.dest, ast);
- if (qbp.getClusterByForClause(ctx_1.dest) != null) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
- } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
- }
-
- break;
-
- case HiveParser.TOK_ORDERBY:
- // Get the order by aliases - these are aliased to the entries in the
- // select list
- queryProperties.setHasOrderBy(true);
- qbp.setOrderByExprForClause(ctx_1.dest, ast);
- if (qbp.getClusterByForClause(ctx_1.dest) != null) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
- }
- break;
-
- case HiveParser.TOK_GROUPBY:
- case HiveParser.TOK_ROLLUP_GROUPBY:
- case HiveParser.TOK_CUBE_GROUPBY:
- case HiveParser.TOK_GROUPING_SETS:
- // Get the groupby aliases - these are aliased to the entries in the
- // select list
- queryProperties.setHasGroupBy(true);
- if (qbp.getJoinExpr() != null) {
- queryProperties.setHasJoinFollowedByGroupBy(true);
- }
- if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
- }
- qbp.setGroupByExprForClause(ctx_1.dest, ast);
- skipRecursion = true;
-
- // Rollup and Cubes are syntactic sugar on top of grouping sets
- if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
- qbp.getDestRollups().add(ctx_1.dest);
- } else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
- qbp.getDestCubes().add(ctx_1.dest);
- } else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
- qbp.getDestGroupingSets().add(ctx_1.dest);
- }
- break;
-
- case HiveParser.TOK_HAVING:
- qbp.setHavingExprForClause(ctx_1.dest, ast);
- qbp.addAggregationExprsForClause(ctx_1.dest,
- doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
- break;
-
- case HiveParser.KW_WINDOW:
- if (!qb.hasWindowingSpec(ctx_1.dest) ) {
- throw new SemanticException(generateErrorMessage(ast,
- "Query has no Cluster/Distribute By; but has a Window definition"));
- }
- handleQueryWindowClauses(qb, ctx_1, ast);
- break;
-
- case HiveParser.TOK_LIMIT:
- if (ast.getChildCount() == 2) {
- qbp.setDestLimit(ctx_1.dest,
- new Integer(ast.getChild(0).getText()),
- new Integer(ast.getChild(1).getText()));
- } else {
- qbp.setDestLimit(ctx_1.dest, new Integer(0),
- new Integer(ast.getChild(0).getText()));
- }
- break;
-
- case HiveParser.TOK_ANALYZE:
- // Case of analyze command
-
- String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase();
-
-
- qb.setTabAlias(table_name, table_name);
- qb.addAlias(table_name);
- qb.getParseInfo().setIsAnalyzeCommand(true);
- qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
- qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan);
- // Allow analyze the whole table and dynamic partitions
- HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
- HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
-
- break;
-
- case HiveParser.TOK_UNIONALL:
- if (!qbp.getIsSubQ()) {
- // this shouldn't happen. The parser should have converted the union to be
- // contained in a subquery. Just in case, we keep the error as a fallback.
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
- }
- skipRecursion = false;
- break;
-
- case HiveParser.TOK_INSERT:
- ASTNode destination = (ASTNode) ast.getChild(0);
- Tree tab = destination.getChild(0);
-
- // Proceed if AST contains partition & If Not Exists
- if (destination.getChildCount() == 2 &&
- tab.getChildCount() == 2 &&
- destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
- String tableName = tab.getChild(0).getChild(0).getText();
-
- Tree partitions = tab.getChild(1);
- int childCount = partitions.getChildCount();
- HashMap<String, String> partition = new HashMap<String, String>();
- for (int i = 0; i < childCount; i++) {
- String partitionName = partitions.getChild(i).getChild(0).getText();
- Tree pvalue = partitions.getChild(i).getChild(1);
- if (pvalue == null) {
- break;
- }
- String partitionVal = stripQuotes(pvalue.getText());
- partition.put(partitionName, partitionVal);
- }
- // if it is a dynamic partition throw the exception
- if (childCount != partition.size()) {
- throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
- .getMsg(partition.toString()));
- }
- Table table = null;
- try {
- table = this.getTableObjectByName(tableName);
- } catch (HiveException ex) {
- throw new SemanticException(ex);
- }
- try {
- Partition parMetaData = db.getPartition(table, partition, false);
- // Check partition exists if it exists skip the overwrite
- if (parMetaData != null) {
- phase1Result = false;
- skipRecursion = true;
- LOG.info("Partition already exists so insert into overwrite " +
- "skipped for partition : " + parMetaData.toString());
- break;
- }
- } catch (HiveException e) {
- LOG.info("Error while getting metadata : ", e);
- }
- validatePartSpec(table, partition, (ASTNode)tab, conf, false);
- }
- skipRecursion = false;
- break;
- case HiveParser.TOK_LATERAL_VIEW:
- case HiveParser.TOK_LATERAL_VIEW_OUTER:
- // todo: nested LV
- assert ast.getChildCount() == 1;
- qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
- break;
- case HiveParser.TOK_CTE:
- processCTE(qb, ast);
- break;
- default:
- skipRecursion = false;
- break;
- }
- }
-
- if (!skipRecursion) {
- // Iterate over the rest of the children
- int child_count = ast.getChildCount();
- for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
- // Recurse
- phase1Result = phase1Result && doPhase1(
- (ASTNode)ast.getChild(child_pos), qb, ctx_1, plannerCtx);
- }
- }
- return phase1Result;
- }
-
- /**
- * This is phase1 of supporting specifying schema in insert statement
- * insert into foo(z,y) select a,b from bar;
- * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode)
- * @throws SemanticException
- */
- private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException {
- ASTNode tabColName = (ASTNode)ast.getChild(1);
- if(ast.getType() == HiveParser.TOK_INSERT_INTO && tabColName != null && tabColName.getType() == HiveParser.TOK_TABCOLNAME) {
- //we have "insert into foo(a,b)..."; parser will enforce that 1+ columns are listed if TOK_TABCOLNAME is present
- List<String> targetColNames = new ArrayList<String>();
- for(Node col : tabColName.getChildren()) {
- assert ((ASTNode)col).getType() == HiveParser.Identifier :
- "expected token " + HiveParser.Identifier + " found " + ((ASTNode)col).getType();
- targetColNames.add(((ASTNode)col).getText());
- }
- String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
- SessionState.get().getCurrentDatabase());
- qbp.setDestSchemaForClause(ctx_1.dest, targetColNames);
- Set<String> targetColumns = new HashSet<String>();
- targetColumns.addAll(targetColNames);
- if(targetColNames.size() != targetColumns.size()) {
- throw new SemanticException(generateErrorMessage(tabColName,
- "Duplicate column name detected in " + fullTableName + " table schema specification"));
- }
- Table targetTable = null;
- try {
- targetTable = db.getTable(fullTableName, false);
- }
- catch (HiveException ex) {
- LOG.error("Error processing HiveParser.TOK_DESTINATION: " + ex.getMessage(), ex);
- throw new SemanticException(ex);
- }
- if(targetTable == null) {
- throw new SemanticException(generateErrorMessage(ast,
- "Unable to access metadata for table " + fullTableName));
- }
- for(FieldSchema f : targetTable.getCols()) {
- //parser only allows foo(a,b), not foo(foo.a, foo.b)
- targetColumns.remove(f.getName());
- }
- if(!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns
- /* We just checked the user specified schema columns among regular table column and found some which are not
- 'regular'. Now check is they are dynamic partition columns
- For dynamic partitioning,
- Given "create table multipart(a int, b int) partitioned by (c int, d int);"
- for "insert into multipart partition(c='1',d)(d,a) values(2,3);" we expect parse tree to look like this
- (TOK_INSERT_INTO
- (TOK_TAB
- (TOK_TABNAME multipart)
- (TOK_PARTSPEC
- (TOK_PARTVAL c '1')
- (TOK_PARTVAL d)
- )
- )
- (TOK_TABCOLNAME d a)
- )*/
- List<String> dynamicPartitionColumns = new ArrayList<String>();
- if(ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
- ASTNode tokTab = (ASTNode)ast.getChild(0);
- ASTNode tokPartSpec = (ASTNode)tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC);
- if(tokPartSpec != null) {
- for(Node n : tokPartSpec.getChildren()) {
- ASTNode tokPartVal = null;
- if(n instanceof ASTNode) {
- tokPartVal = (ASTNode)n;
- }
- if(tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL && tokPartVal.getChildCount() == 1) {
- assert tokPartVal.getChild(0).getType() == HiveParser.Identifier :
- "Expected column name; found tokType=" + tokPartVal.getType();
- dynamicPartitionColumns.add(tokPartVal.getChild(0).getText());
- }
- }
- }
- }
- for(String colName : dynamicPartitionColumns) {
- targetColumns.remove(colName);
- }
- if(!targetColumns.isEmpty()) {
- //Found some columns in user specified schema which are neither regular not dynamic partition columns
- throw new SemanticException(generateErrorMessage(tabColName,
- "'" + (targetColumns.size() == 1 ? targetColumns.iterator().next() : targetColumns) +
- "' in insert schema specification " + (targetColumns.size() == 1 ? "is" : "are") +
- " not found among regular columns of " +
- fullTableName + " nor dynamic partition columns."));
- }
- }
- }
- }
-
- public void getMaterializationMetadata(QB qb) throws SemanticException {
- try {
- gatherCTEReferences(qb, rootClause);
- int threshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD);
- for (CTEClause cte : Sets.newHashSet(aliasToCTEs.values())) {
- if (threshold >= 0 && cte.reference >= threshold) {
- cte.materialize = true;
- }
- }
- } catch (HiveException e) {
- // Has to use full name to make sure it does not conflict with
- // org.apache.commons.lang.StringUtils
- LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
- if (e instanceof SemanticException) {
- throw (SemanticException)e;
- }
- throw new SemanticException(e.getMessage(), e);
- }
- }
-
- private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent) throws HiveException {
- if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
- gatherCTEReferences(qbexpr.getQB(), parent);
- } else {
- gatherCTEReferences(qbexpr.getQBExpr1(), parent);
- gatherCTEReferences(qbexpr.getQBExpr2(), parent);
- }
- }
-
- // TODO: check view references, too
- private void gatherCTEReferences(QB qb, CTEClause current) throws HiveException {
- for (String alias : qb.getTabAliases()) {
- String tabName = qb.getTabNameForAlias(alias);
- String cteName = tabName.toLowerCase();
-
- CTEClause cte = findCTEFromName(qb, cteName);
- if (cte != null) {
- if (ctesExpanded.contains(cteName)) {
- throw new SemanticException("Recursive cte " + cteName +
- " detected (cycle: " + StringUtils.join(ctesExpanded, " -> ") +
- " -> " + cteName + ").");
- }
- cte.reference++;
- current.parents.add(cte);
- if (cte.qbExpr != null) {
- continue;
- }
- cte.qbExpr = new QBExpr(cteName);
- doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName);
-
- ctesExpanded.add(cteName);
- gatherCTEReferences(cte.qbExpr, cte);
- ctesExpanded.remove(ctesExpanded.size() - 1);
- }
- }
- for (String alias : qb.getSubqAliases()) {
- gatherCTEReferences(qb.getSubqForAlias(alias), current);
- }
- }
-
- public void getMetaData(QB qb) throws SemanticException {
- getMetaData(qb, false);
- }
-
- public void getMetaData(QB qb, boolean enableMaterialization) throws SemanticException {
- try {
- if (enableMaterialization) {
- getMaterializationMetadata(qb);
- }
- getMetaData(qb, null);
- } catch (HiveException e) {
- // Has to use full name to make sure it does not conflict with
- // org.apache.commons.lang.StringUtils
- LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
- if (e instanceof SemanticException) {
- throw (SemanticException)e;
- }
- throw new SemanticException(e.getMessage(), e);
- }
- }
-
- private void getMetaData(QBExpr qbexpr, ReadEntity parentInput)
- throws HiveException {
- if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
- getMetaData(qbexpr.getQB(), parentInput);
- } else {
- getMetaData(qbexpr.getQBExpr1(), parentInput);
- getMetaData(qbexpr.getQBExpr2(), parentInput);
- }
- }
-
- @SuppressWarnings("nls")
- private void getMetaData(QB qb, ReadEntity parentInput)
- throws HiveException {
- LOG.info("Get metadata for source tables");
-
- // Go over the tables and populate the related structures.
- // We have to materialize the table alias list since we might
- // modify it in the middle for view rewrite.
- List<String> tabAliases = new ArrayList<String>(qb.getTabAliases());
-
- // Keep track of view alias to view name and read entity
- // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
- // keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
- // This is needed for tracking the dependencies for inputs, along with their parents.
- Map<String, ObjectPair<String, ReadEntity>> aliasToViewInfo =
- new HashMap<String, ObjectPair<String, ReadEntity>>();
-
- /*
- * used to capture view to SQ conversions. This is used to check for
- * recursive CTE invocations.
- */
- Map<String, String> sqAliasToCTEName = new HashMap<String, String>();
-
- for (String alias : tabAliases) {
- String tabName = qb.getTabNameForAlias(alias);
- String cteName = tabName.toLowerCase();
-
- Table tab = db.getTable(tabName, false);
- if (tab == null ||
- tab.getDbName().equals(SessionState.get().getCurrentDatabase())) {
- Table materializedTab = ctx.getMaterializedTable(cteName);
- if (materializedTab == null) {
- // we first look for this alias from CTE, and then from catalog.
- CTEClause cte = findCTEFromName(qb, cteName);
- if (cte != null) {
- if (!cte.materialize) {
- addCTEAsSubQuery(qb, cteName, alias);
- sqAliasToCTEName.put(alias, cteName);
- continue;
- }
- tab = materializeCTE(cteName, cte);
- }
- } else {
- tab = materializedTab;
- }
- }
-
- if (tab == null) {
- ASTNode src = qb.getParseInfo().getSrcForAlias(alias);
- if (null != src) {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src));
- } else {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
- }
- }
-
- // Disallow INSERT INTO on bucketized tables
- boolean isAcid = AcidUtils.isAcidTable(tab);
- boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(tab.getDbName(), tab.getTableName());
- if (isTableWrittenTo &&
- tab.getNumBuckets() > 0 && !isAcid) {
- throw new SemanticException(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE.
- getMsg("Table: " + tabName));
- }
- // Disallow update and delete on non-acid tables
- if ((updating() || deleting()) && !isAcid && isTableWrittenTo) {
- //isTableWrittenTo: delete from acidTbl where a in (select id from nonAcidTable)
- //so only assert this if we are actually writing to this table
- // Whether we are using an acid compliant transaction manager has already been caught in
- // UpdateDeleteSemanticAnalyzer, so if we are updating or deleting and getting nonAcid
- // here, it means the table itself doesn't support it.
- throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, tabName);
- }
-
- if (tab.isView()) {
- if (qb.getParseInfo().isAnalyzeCommand()) {
- throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
- }
- String fullViewName = tab.getDbName() + "." + tab.getTableName();
- // Prevent view cycles
- if (viewsExpanded.contains(fullViewName)) {
- throw new SemanticException("Recursive view " + fullViewName +
- " detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") +
- " -> " + fullViewName + ").");
- }
- replaceViewReferenceWithDefinition(qb, tab, tabName, alias);
- // This is the last time we'll see the Table objects for views, so add it to the inputs
- // now. isInsideView will tell if this view is embedded in another view.
- ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView());
- viewInput = PlanUtils.addInput(inputs, viewInput);
- aliasToViewInfo.put(alias, new ObjectPair<String, ReadEntity>(fullViewName, viewInput));
- String aliasId = getAliasId(alias, qb);
- if (aliasId != null) {
- aliasId = aliasId.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "")
- .replace(SemanticAnalyzer.SUBQUERY_TAG_2, "");
- }
- viewAliasToInput.put(aliasId, viewInput);
- continue;
- }
-
- if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
- throw new SemanticException(generateErrorMessage(
- qb.getParseInfo().getSrcForAlias(alias),
- ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
- }
-
- qb.getMetaData().setSrcForAlias(alias, tab);
-
- if (qb.getParseInfo().isAnalyzeCommand()) {
- // allow partial partition specification for nonscan since noscan is fast.
- TableSpec ts = new TableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan);
- if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions
- try {
- ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
- } catch (HiveException e) {
- throw new SemanticException(generateErrorMessage(
- qb.getParseInfo().getSrcForAlias(alias),
- "Cannot get partitions for " + ts.partSpec), e);
- }
- }
- // validate partial scan command
- QBParseInfo qbpi = qb.getParseInfo();
- if (qbpi.isPartialScanAnalyzeCommand()) {
- Class<? extends InputFormat> inputFormatClass = null;
- switch (ts.specType) {
- case TABLE_ONLY:
- case DYNAMIC_PARTITION:
- inputFormatClass = ts.tableHandle.getInputFormatClass();
- break;
- case STATIC_PARTITION:
- inputFormatClass = ts.partHandle.getInputFormatClass();
- break;
- default:
- assert false;
- }
- // throw a HiveException for formats other than rcfile or orcfile.
- if (!(inputFormatClass.equals(RCFileInputFormat.class) || inputFormatClass
- .equals(OrcInputFormat.class))) {
- throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_NON_RCFILE.getMsg());
- }
- }
-
- tab.setTableSpec(ts);
- qb.getParseInfo().addTableSpec(alias, ts);
- }
-
- ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
- PlanUtils.addInput(inputs,
- new ReadEntity(tab, parentViewInfo, parentViewInfo == null),mergeIsDirect);
- }
-
- LOG.info("Get metadata for subqueries");
- // Go over the subqueries and getMetaData for these
- for (String alias : qb.getSubqAliases()) {
- boolean wasView = aliasToViewInfo.containsKey(alias);
- boolean wasCTE = sqAliasToCTEName.containsKey(alias);
- ReadEntity newParentInput = null;
- if (wasView) {
- viewsExpanded.add(aliasToViewInfo.get(alias).getFirst());
- newParentInput = aliasToViewInfo.get(alias).getSecond();
- } else if (wasCTE) {
- ctesExpanded.add(sqAliasToCTEName.get(alias));
- }
- QBExpr qbexpr = qb.getSubqForAlias(alias);
- getMetaData(qbexpr, newParentInput);
- if (wasView) {
- viewsExpanded.remove(viewsExpanded.size() - 1);
- } else if (wasCTE) {
- ctesExpanded.remove(ctesExpanded.size() - 1);
- }
- }
-
- RowFormatParams rowFormatParams = new RowFormatParams();
- StorageFormat storageFormat = new StorageFormat(conf);
-
- LOG.info("Get metadata for destination tables");
- // Go over all the destination structures and populate the related
- // metadata
- QBParseInfo qbp = qb.getParseInfo();
-
- for (String name : qbp.getClauseNamesForDest()) {
- ASTNode ast = qbp.getDestForClause(name);
- switch (ast.getToken().getType()) {
- case HiveParser.TOK_TAB: {
- TableSpec ts = new TableSpec(db, conf, ast);
- if (ts.tableHandle.isView()) {
- throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
- }
-
- Class<?> outputFormatClass = ts.tableHandle.getOutputFormatClass();
- if (!ts.tableHandle.isNonNative() &&
- !HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) {
- throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE
- .getMsg(ast, "The class is " + outputFormatClass.toString()));
- }
-
- // TableSpec ts is got from the query (user specified),
- // which means the user didn't specify partitions in their query,
- // but whether the table itself is partitioned is not know.
- if (ts.specType != SpecType.STATIC_PARTITION) {
- // This is a table or dynamic partition
- qb.getMetaData().setDestForAlias(name, ts.tableHandle);
- // has dynamic as well as static partitions
- if (ts.partSpec != null && ts.partSpec.size() > 0) {
- qb.getMetaData().setPartSpecForAlias(name, ts.partSpec);
- }
- } else {
- // This is a partition
- qb.getMetaData().setDestForAlias(name, ts.partHandle);
- }
- if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
- // Add the table spec for the destination table.
- qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
- }
- break;
- }
-
- case HiveParser.TOK_DIR: {
- // This is a dfs file
- String fname = stripQuotes(ast.getChild(0).getText());
- if ((!qb.getParseInfo().getIsSubQ())
- && (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) {
-
- if (qb.isCTAS()) {
- qb.setIsQuery(false);
- ctx.setResDir(null);
- ctx.setResFile(null);
-
- // allocate
<TRUNCATED>
[2/2] hive git commit: HIVE-14728: Redundant orig files (Rui reviewed
by Pengcheng)
Posted by li...@apache.org.
HIVE-14728: Redundant orig files (Rui reviewed by Pengcheng)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bc75e46a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bc75e46a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bc75e46a
Branch: refs/heads/master
Commit: bc75e46adabd3e8b4738da985291900631b8509d
Parents: 407cfe1
Author: Rui Li <sh...@cn.ibm.com>
Authored: Sat Sep 10 14:33:15 2016 +0800
Committer: Rui Li <sh...@cn.ibm.com>
Committed: Sat Sep 10 14:33:15 2016 +0800
----------------------------------------------------------------------
.../resources/testconfiguration.properties.orig | 1377 --
.../org/apache/hadoop/hive/ql/Context.java.orig | 829 --
.../hive/ql/parse/SemanticAnalyzer.java.orig | 13038 -----------------
3 files changed, 15244 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bc75e46a/itests/src/test/resources/testconfiguration.properties.orig
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties.orig b/itests/src/test/resources/testconfiguration.properties.orig
deleted file mode 100644
index a920ca9..0000000
--- a/itests/src/test/resources/testconfiguration.properties.orig
+++ /dev/null
@@ -1,1377 +0,0 @@
-# NOTE: files should be listed in alphabetical order
-minimr.query.files=auto_sortmerge_join_16.q,\
- bucket4.q,\
- bucket5.q,\
- bucket6.q,\
- bucket_many.q,\
- bucket_num_reducers.q,\
- bucket_num_reducers2.q,\
- bucketizedhiveinputformat.q,\
- bucketmapjoin6.q,\
- bucketmapjoin7.q,\
- disable_merge_for_bucketing.q,\
- empty_dir_in_table.q,\
- exchgpartition2lel.q,\
- external_table_with_space_in_location_path.q,\
- file_with_header_footer.q,\
- groupby2.q,\
- import_exported_table.q,\
- index_bitmap3.q,\
- index_bitmap_auto.q,\
- infer_bucket_sort_bucketed_table.q,\
- infer_bucket_sort_dyn_part.q,\
- infer_bucket_sort_map_operators.q,\
- infer_bucket_sort_merge.q,\
- infer_bucket_sort_num_buckets.q,\
- infer_bucket_sort_reducers_power_two.q,\
- input16_cc.q,\
- insert_dir_distcp.q,\
- join1.q,\
- join_acid_non_acid.q,\
- leftsemijoin_mr.q,\
- list_bucket_dml_10.q,\
- load_fs2.q,\
- load_hdfs_file_with_space_in_the_name.q,\
- non_native_window_udf.q, \
- parallel_orderby.q,\
- quotedid_smb.q,\
- reduce_deduplicate.q,\
- remote_script.q,\
- root_dir_external_table.q,\
- schemeAuthority.q,\
- schemeAuthority2.q,\
- scriptfile1.q,\
- scriptfile1_win.q,\
- skewjoin_onesideskew.q,\
- table_nonprintable.q,\
- temp_table_external.q,\
- truncate_column_buckets.q,\
- uber_reduce.q,\
- udf_using.q
-
-# These tests are disabled for minimr
-# ql_rewrite_gbtoidx.q,\
-# ql_rewrite_gbtoidx_cbo_1.q,\
-# ql_rewrite_gbtoidx_cbo_2.q,\
-# smb_mapjoin_8.q,\
-
-
-# Tests that are not enabled for CLI Driver
-disabled.query.files=ql_rewrite_gbtoidx.q,\
- ql_rewrite_gbtoidx_cbo_1.q,\
- ql_rewrite_gbtoidx_cbo_2.q,\
- rcfile_merge1.q,\
- smb_mapjoin_8.q
-
-# NOTE: Add tests to minitez only if it is very
-# specific to tez and cannot be added to minillap.
-minitez.query.files.shared=delete_orig_table.q,\
- orc_merge12.q,\
- orc_vectorization_ppd.q,\
- unionDistinct_2.q,\
- update_orig_table.q,\
- vector_join_part_col_char.q,\
- vector_non_string_partition.q,\
- vectorization_div0.q,\
- vectorization_limit.q
-
-# NOTE: Add tests to minitez only if it is very
-# specific to tez and cannot be added to minillap.
-minitez.query.files=explainuser_3.q,\
- explainanalyze_1.q,\
- explainanalyze_2.q,\
- explainanalyze_3.q,\
- explainanalyze_4.q,\
- explainanalyze_5.q,\
- hybridgrace_hashjoin_1.q,\
- hybridgrace_hashjoin_2.q,\
- partition_column_names_with_leading_and_trailing_spaces.q,\
- stats_filemetadata.q,\
- tez_union_with_udf.q
-
-minillap.shared.query.files=acid_globallimit.q,\
- alter_merge_2_orc.q,\
- alter_merge_orc.q,\
- alter_merge_stats_orc.q,\
- auto_join0.q,\
- auto_join1.q,\
- auto_join21.q,\
- auto_join29.q,\
- auto_join30.q,\
- auto_join_filters.q,\
- auto_join_nulls.q,\
- auto_sortmerge_join_1.q,\
- auto_sortmerge_join_10.q,\
- auto_sortmerge_join_11.q,\
- auto_sortmerge_join_12.q,\
- auto_sortmerge_join_13.q,\
- auto_sortmerge_join_14.q,\
- auto_sortmerge_join_15.q,\
- auto_sortmerge_join_16.q,\
- auto_sortmerge_join_2.q,\
- auto_sortmerge_join_3.q,\
- auto_sortmerge_join_4.q,\
- auto_sortmerge_join_5.q,\
- auto_sortmerge_join_6.q,\
- auto_sortmerge_join_7.q,\
- auto_sortmerge_join_8.q,\
- auto_sortmerge_join_9.q,\
- bucket2.q,\
- bucket3.q,\
- bucket4.q,\
- bucket_map_join_tez1.q,\
- bucket_map_join_tez2.q,\
- cbo_gby.q,\
- cbo_gby_empty.q,\
- cbo_join.q,\
- cbo_limit.q,\
- cbo_semijoin.q,\
- cbo_simple_select.q,\
- cbo_stats.q,\
- cbo_subq_exists.q,\
- cbo_subq_in.q,\
- cbo_subq_not_in.q,\
- cbo_udf_udaf.q,\
- cbo_union.q,\
- cbo_views.q,\
- cbo_windowing.q,\
- column_names_with_leading_and_trailing_spaces.q,\
- constprog_dpp.q,\
- constprog_semijoin.q,\
- correlationoptimizer1.q,\
- count.q,\
- create_merge_compressed.q,\
- cross_join.q,\
- cross_product_check_1.q,\
- cross_product_check_2.q,\
- ctas.q,\
- cte_1.q,\
- cte_2.q,\
- cte_3.q,\
- cte_4.q,\
- cte_5.q,\
- cte_mat_1.q,\
- cte_mat_2.q,\
- cte_mat_3.q,\
- cte_mat_4.q,\
- cte_mat_5.q,\
- custom_input_output_format.q,\
- deleteAnalyze.q,\
- delete_all_non_partitioned.q,\
- delete_all_partitioned.q,\
- delete_tmp_table.q,\
- delete_where_no_match.q,\
- delete_where_non_partitioned.q,\
- delete_where_partitioned.q,\
- delete_whole_partition.q,\
- disable_merge_for_bucketing.q,\
- dynamic_partition_pruning.q,\
- dynamic_partition_pruning_2.q,\
- dynpart_sort_opt_vectorization.q,\
- dynpart_sort_optimization.q,\
- dynpart_sort_optimization2.q,\
- empty_join.q,\
- enforce_order.q,\
- filter_join_breaktask.q,\
- filter_join_breaktask2.q,\
- groupby1.q,\
- groupby2.q,\
- groupby3.q,\
- having.q,\
- identity_project_remove_skip.q,\
- insert1.q,\
- insert_into1.q,\
- insert_into2.q,\
- insert_orig_table.q,\
- insert_update_delete.q,\
- insert_values_dynamic_partitioned.q,\
- insert_values_non_partitioned.q,\
- insert_values_orig_table.,\
- insert_values_partitioned.q,\
- insert_values_tmp_table.q,\
- join0.q,\
- join1.q,\
- join_nullsafe.q,\
- leftsemijoin.q,\
- limit_pushdown.q,\
- llap_nullscan.q,\
- llapdecider.q,\
- load_dyn_part1.q,\
- load_dyn_part2.q,\
- load_dyn_part3.q,\
- lvj_mapjoin.q,\
- mapjoin2.q,\
- mapjoin_decimal.q,\
- mapjoin_mapjoin.q,\
- mapreduce1.q,\
- mapreduce2.q,\
- merge1.q,\
- merge2.q,\
- mergejoin.q,\
- metadata_only_queries.q,\
- metadata_only_queries_with_filters.q,\
- metadataonly1.q,\
- mrr.q,\
- nonmr_fetch_threshold.q,\
- optimize_nullscan.q,\
- orc_analyze.q,\
- orc_merge1.q,\
- orc_merge10.q,\
- orc_merge11.q,\
- orc_merge2.q,\
- orc_merge3.q,\
- orc_merge4.q,\
- orc_merge5.q,\
- orc_merge6.q,\
- orc_merge7.q,\
- orc_merge8.q,\
- orc_merge9.q,\
- orc_merge_diff_fs.q,\
- orc_merge_incompat1.q,\
- orc_merge_incompat2.q,\
- orc_merge_incompat3.q,\
- orc_ppd_basic.q,\
- orc_ppd_schema_evol_1a.q,\
- orc_ppd_schema_evol_1b.q,\
- orc_ppd_schema_evol_2a.q,\
- orc_ppd_schema_evol_2b.q,\
- orc_ppd_schema_evol_3a.q,\
- order_null.q,\
- parallel.q,\
- ptf.q,\
- ptf_matchpath.q,\
- ptf_streaming.q,\
- sample1.q,\
- script_env_var1.q,\
- script_env_var2.q,\
- script_pipe.q,\
- scriptfile1.q,\
- selectDistinctStar.q,\
- select_dummy_source.q,\
- skewjoin.q,\
- stats_noscan_1.q,\
- stats_only_null.q,\
- subquery_exists.q,\
- subquery_in.q,\
- temp_table.q,\
- tez_bmj_schema_evolution.q,\
- tez_dml.q,\
- tez_dynpart_hashjoin_1.q,\
- tez_dynpart_hashjoin_2.q,\
- tez_fsstat.q,\
- tez_insert_overwrite_local_directory_1.q,\
- tez_join.q,\
- tez_join_hash.q,\
- tez_join_result_complex.q,\
- tez_join_tests.q,\
- tez_joins_explain.q,\
- tez_multi_union.q,\
- tez_schema_evolution.q,\
- tez_self_join.q,\
- tez_smb_1.q,\
- tez_smb_main.q,\
- tez_union.q,\
- tez_union2.q,\
- tez_union_decimal.q,\
- tez_union_dynamic_partition.q,\
- tez_union_group_by.q,\
- tez_union_multiinsert.q,\
- tez_union_view.q,\
- tez_vector_dynpart_hashjoin_1.q,\
- tez_vector_dynpart_hashjoin_2.q,\
- transform1.q,\
- transform2.q,\
- transform_ppr1.q,\
- transform_ppr2.q,\
- union2.q,\
- union3.q,\
- union4.q,\
- union5.q,\
- union6.q,\
- union7.q,\
- union8.q,\
- union9.q,\
- unionDistinct_1.q,\
- union_fast_stats.q,\
- union_stats.q,\
- union_type_chk.q,\
- update_after_multiple_inserts.q,\
- update_all_non_partitioned.q,\
- update_all_partitioned.q,\
- update_all_types.q,\
- update_tmp_table.q,\
- update_two_cols.q,\
- update_where_no_match.q,\
- update_where_non_partitioned.q,\
- update_where_partitioned.q,\
- vector_acid3.q,\
- vector_aggregate_9.q,\
- vector_aggregate_without_gby.q,\
- vector_auto_smb_mapjoin_14.q,\
- vector_between_columns.q,\
- vector_between_in.q,\
- vector_binary_join_groupby.q,\
- vector_bround.q,\
- vector_bucket.q,\
- vector_cast_constant.q,\
- vector_char_2.q,\
- vector_char_4.q,\
- vector_char_cast.q,\
- vector_char_mapjoin1.q,\
- vector_char_simple.q,\
- vector_coalesce.q,\
- vector_coalesce_2.q,\
- vector_complex_all.q,\
- vector_complex_join.q,\
- vector_count.q,\
- vector_count_distinct.q,\
- vector_data_types.q,\
- vector_date_1.q,\
- vector_decimal_1.q,\
- vector_decimal_10_0.q,\
- vector_decimal_2.q,\
- vector_decimal_3.q,\
- vector_decimal_4.q,\
- vector_decimal_5.q,\
- vector_decimal_6.q,\
- vector_decimal_aggregate.q,\
- vector_decimal_cast.q,\
- vector_decimal_expressions.q,\
- vector_decimal_mapjoin.q,\
- vector_decimal_math_funcs.q,\
- vector_decimal_precision.q,\
- vector_decimal_round.q,\
- vector_decimal_round_2.q,\
- vector_decimal_trailing.q,\
- vector_decimal_udf.q,\
- vector_decimal_udf2.q,\
- vector_distinct_2.q,\
- vector_elt.q,\
- vector_groupby4.q,\
- vector_groupby6.q,\
- vector_groupby_3.q,\
- vector_groupby_mapjoin.q,\
- vector_groupby_reduce.q,\
- vector_grouping_sets.q,\
- vector_if_expr.q,\
- vector_include_no_sel.q,\
- vector_inner_join.q,\
- vector_interval_1.q,\
- vector_interval_2.q,\
- vector_interval_arithmetic.q,\
- vector_interval_mapjoin.q,\
- vector_join30.q,\
- vector_join_filters.q,\
- vector_join_nulls.q,\
- vector_left_outer_join.q,\
- vector_left_outer_join2.q,\
- vector_leftsemi_mapjoin.q,\
- vector_mapjoin_reduce.q,\
- vector_mr_diff_schema_alias.q,\
- vector_multi_insert.q,\
- vector_null_projection.q,\
- vector_nullsafe_join.q,\
- vector_nvl.q,\
- vector_orderby_5.q,\
- vector_outer_join0.q,\
- vector_outer_join1.q,\
- vector_outer_join2.q,\
- vector_outer_join3.q,\
- vector_outer_join4.q,\
- vector_outer_join5.q,\
- vector_outer_join6.q,\
- vector_partition_diff_num_cols.q,\
- vector_partitioned_date_time.q,\
- vector_reduce1.q,\
- vector_reduce2.q,\
- vector_reduce3.q,\
- vector_reduce_groupby_decimal.q,\
- vector_string_concat.q,\
- vector_struct_in.q,\
- vector_varchar_4.q,\
- vector_varchar_mapjoin1.q,\
- vector_varchar_simple.q,\
- vector_when_case_null.q,\
- vectorization_0.q,\
- vectorization_1.q,\
- vectorization_10.q,\
- vectorization_11.q,\
- vectorization_12.q,\
- vectorization_13.q,\
- vectorization_14.q,\
- vectorization_15.q,\
- vectorization_16.q,\
- vectorization_17.q,\
- vectorization_2.q,\
- vectorization_3.q,\
- vectorization_4.q,\
- vectorization_5.q,\
- vectorization_6.q,\
- vectorization_7.q,\
- vectorization_8.q,\
- vectorization_9.q,\
- vectorization_decimal_date.q,\
- vectorization_nested_udf.q,\
- vectorization_not.q,\
- vectorization_part.q,\
- vectorization_part_project.q,\
- vectorization_part_varchar.q,\
- vectorization_pushdown.q,\
- vectorization_short_regress.q,\
- vectorized_bucketmapjoin1.q,\
- vectorized_case.q,\
- vectorized_casts.q,\
- vectorized_context.q,\
- vectorized_date_funcs.q,\
- vectorized_distinct_gby.q,\
- vectorized_dynamic_partition_pruning.q,\
- vectorized_mapjoin.q,\
- vectorized_math_funcs.q,\
- vectorized_nested_mapjoin.q,\
- vectorized_parquet.q,\
- vectorized_parquet_types.q,\
- vectorized_ptf.q,\
- vectorized_rcfile_columnar.q,\
- vectorized_shufflejoin.q,\
- vectorized_string_funcs.q,\
- vectorized_timestamp.q,\
- vectorized_timestamp_funcs.q,\
- vectorized_timestamp_ints_casts.q
-
-minillap.query.files=acid_bucket_pruning.q,\
- acid_vectorization_missing_cols.q,\
- bucket_map_join_tez1.q,\
- bucket_map_join_tez2.q,\
- bucketpruning1.q,\
- constprog_dpp.q,\
- dynamic_partition_pruning.q,\
- dynamic_partition_pruning_2.q,\
- explainuser_1.q,\
- explainuser_2.q,\
- explainuser_4.q,\
- hybridgrace_hashjoin_1.q,\
- hybridgrace_hashjoin_2.q,\
- llap_nullscan.q,\
- llap_udf.q,\
- llapdecider.q,\
- lvj_mapjoin.q,\
- mapjoin_decimal.q,\
- mergejoin_3way.q,\
- mrr.q,\
- orc_llap.q,\
- orc_llap_counters.q,\
- orc_llap_counters1.q,\
- orc_llap_nonvector.q,\
- orc_ppd_basic.q,\
- schema_evol_orc_acid_part.q,\
- schema_evol_orc_acid_part_update.q,\
- schema_evol_orc_acid_table.q,\
- schema_evol_orc_acid_table_update.q,\
- schema_evol_orc_acidvec_part.q,\
- schema_evol_orc_acidvec_part_update.q,\
- schema_evol_orc_acidvec_table.q,\
- schema_evol_orc_acidvec_table_update.q,\
- schema_evol_orc_nonvec_part.q,\
- schema_evol_orc_nonvec_part_all_complex.q,\
- schema_evol_orc_nonvec_part_all_primitive.q,\
- schema_evol_orc_nonvec_table.q,\
- schema_evol_orc_vec_part.q,\
- schema_evol_orc_vec_part_all_complex.q,\
- schema_evol_orc_vec_part_all_primitive.q,\
- schema_evol_orc_vec_table.q,\
- schema_evol_stats.q,\
- schema_evol_text_nonvec_part.q,\
- schema_evol_text_nonvec_part_all_complex.q,\
- schema_evol_text_nonvec_part_all_primitive.q,\
- schema_evol_text_nonvec_table.q,\
- schema_evol_text_vec_part.q,\
- schema_evol_text_vec_part_all_complex.q,\
- schema_evol_text_vec_part_all_primitive.q,\
- schema_evol_text_vec_table.q,\
- schema_evol_text_vecrow_part.q,\
- schema_evol_text_vecrow_part_all_complex.q,\
- schema_evol_text_vecrow_part_all_primitive.q,\
- schema_evol_text_vecrow_table.q,\
- smb_cache.q,\
- tez_aggr_part_stats.q,\
- tez_bmj_schema_evolution.q,\
- tez_dml.q,\
- tez_dynpart_hashjoin_1.q,\
- tez_dynpart_hashjoin_2.q,\
- tez_dynpart_hashjoin_3.q,\
- tez_fsstat.q,\
- tez_insert_overwrite_local_directory_1.q,\
- tez_join.q,\
- tez_join_result_complex.q,\
- tez_join_tests.q,\
- tez_joins_explain.q,\
- tez_multi_union.q,\
- tez_schema_evolution.q,\
- tez_self_join.q,\
- tez_smb_1.q,\
- tez_smb_empty.q,\
- tez_smb_main.q,\
- tez_union.q,\
- tez_union2.q,\
- tez_union_decimal.q,\
- tez_union_dynamic_partition.q,\
- tez_union_group_by.q,\
- tez_union_multiinsert.q,\
- tez_union_view.q,\
- tez_vector_dynpart_hashjoin_1.q,\
- tez_vector_dynpart_hashjoin_2.q,\
- vectorized_dynamic_partition_pruning.q,\
- windowing_gby.q
-
-encrypted.query.files=encryption_join_unencrypted_tbl.q,\
- encryption_insert_partition_static.q,\
- encryption_insert_partition_dynamic.q,\
- encryption_join_with_different_encryption_keys.q,\
- encryption_select_read_only_encrypted_tbl.q,\
- encryption_select_read_only_unencrypted_tbl.q,\
- encryption_load_data_to_encrypted_tables.q, \
- encryption_unencrypted_nonhdfs_external_tables.q \
- encryption_move_tbl.q \
- encryption_drop_table.q \
- encryption_insert_values.q \
- encryption_drop_view.q \
- encryption_drop_partition.q \
- encryption_with_trash.q \
- encryption_ctas.q
-
-beeline.positive.exclude=add_part_exist.q,\
- alter1.q,\
- alter2.q,\
- alter4.q,\
- alter5.q,\
- alter_rename_partition.q,\
- alter_rename_partition_authorization.q,\
- archive.q,\
- archive_corrupt.q,\
- archive_mr_1806.q,\
- archive_multi.q,\
- archive_multi_mr_1806.q,\
- authorization_1.q,\
- authorization_2.q,\
- authorization_4.q,\
- authorization_5.q,\
- authorization_6.q,\
- authorization_7.q,\
- ba_table1.q,\
- ba_table2.q,\
- ba_table3.q,\
- ba_table_udfs.q,\
- binary_table_bincolserde.q,\
- binary_table_colserde.q,\
- cluster.q,\
- columnarserde_create_shortcut.q,\
- combine2.q,\
- constant_prop.q,\
- create_nested_type.q,\
- create_or_replace_view.q,\
- create_struct_table.q,\
- create_union_table.q,\
- database.q,\
- database_location.q,\
- database_properties.q,\
- describe_database_json.q,\
- drop_database_removes_partition_dirs.q,\
- escape1.q,\
- escape2.q,\
- exim_00_nonpart_empty.q,\
- exim_01_nonpart.q,\
- exim_02_00_part_empty.q,\
- exim_02_part.q,\
- exim_03_nonpart_over_compat.q,\
- exim_04_all_part.q,\
- exim_04_evolved_parts.q,\
- exim_05_some_part.q,\
- exim_06_one_part.q,\
- exim_07_all_part_over_nonoverlap.q,\
- exim_08_nonpart_rename.q,\
- exim_09_part_spec_nonoverlap.q,\
- exim_10_external_managed.q,\
- exim_11_managed_external.q,\
- exim_12_external_location.q,\
- exim_13_managed_location.q,\
- exim_14_managed_location_over_existing.q,\
- exim_15_external_part.q,\
- exim_16_part_external.q,\
- exim_17_part_managed.q,\
- exim_18_part_external.q,\
- exim_19_00_part_external_location.q,\
- exim_19_part_external_location.q,\
- exim_20_part_managed_location.q,\
- exim_21_export_authsuccess.q,\
- exim_22_import_exist_authsuccess.q,\
- exim_23_import_part_authsuccess.q,\
- exim_24_import_nonexist_authsuccess.q,\
- global_limit.q,\
- groupby_complex_types.q,\
- groupby_complex_types_multi_single_reducer.q,\
- index_auth.q,\
- index_auto.q,\
- index_auto_empty.q,\
- index_bitmap.q,\
- index_bitmap1.q,\
- index_bitmap2.q,\
- index_bitmap3.q,\
- index_bitmap_auto.q,\
- index_bitmap_rc.q,\
- index_compact.q,\
- index_compact_1.q,\
- index_compact_2.q,\
- index_compact_3.q,\
- index_stale_partitioned.q,\
- init_file.q,\
- input16.q,\
- input16_cc.q,\
- input46.q,\
- input_columnarserde.q,\
- input_dynamicserde.q,\
- input_lazyserde.q,\
- input_testxpath3.q,\
- input_testxpath4.q,\
- insert2_overwrite_partitions.q,\
- insertexternal1.q,\
- join_thrift.q,\
- lateral_view.q,\
- load_binary_data.q,\
- load_exist_part_authsuccess.q,\
- load_nonpart_authsuccess.q,\
- load_part_authsuccess.q,\
- loadpart_err.q,\
- lock1.q,\
- lock2.q,\
- lock3.q,\
- lock4.q,\
- merge_dynamic_partition.q,\
- multi_insert.q,\
- multi_insert_move_tasks_share_dependencies.q,\
- null_column.q,\
- ppd_clusterby.q,\
- query_with_semi.q,\
- rename_column.q,\
- sample6.q,\
- sample_islocalmode_hook.q,\
- set_processor_namespaces.q,\
- show_tables.q,\
- source.q,\
- split_sample.q,\
- str_to_map.q,\
- transform1.q,\
- udaf_collect_set.q,\
- udaf_context_ngrams.q,\
- udaf_histogram_numeric.q,\
- udaf_ngrams.q,\
- udaf_percentile_approx.q,\
- udf_array.q,\
- udf_bitmap_and.q,\
- udf_bitmap_or.q,\
- udf_explode.q,\
- udf_format_number.q,\
- udf_map.q,\
- udf_map_keys.q,\
- udf_map_values.q,\
- udf_mask.q,\
- udf_mask_first_n.q,\
- udf_mask_hash.q,\
- udf_mask_last_n.q,\
- udf_mask_show_first_n.q,\
- udf_mask_show_last_n.q,\
- udf_max.q,\
- udf_min.q,\
- udf_named_struct.q,\
- udf_percentile.q,\
- udf_printf.q,\
- udf_sentences.q,\
- udf_sort_array.q,\
- udf_split.q,\
- udf_struct.q,\
- udf_substr.q,\
- udf_translate.q,\
- udf_union.q,\
- udf_xpath.q,\
- udtf_stack.q,\
- view.q,\
- virtual_column.q
-
-minimr.query.negative.files=cluster_tasklog_retrieval.q,\
- file_with_header_footer_negative.q,\
- local_mapred_error_cache.q,\
- mapreduce_stack_trace.q,\
- mapreduce_stack_trace_hadoop20.q,\
- mapreduce_stack_trace_turnoff.q,\
- mapreduce_stack_trace_turnoff_hadoop20.q,\
- minimr_broken_pipe.q,\
- table_nonprintable_negative.q,\
- udf_local_resource.q
-
-# tests are sorted use: perl -pe 's@\\\s*\n@ @g' testconfiguration.properties \
-# | awk -F= '/spark.query.files/{print $2}' | perl -pe 's@.q *, *@\n@g' \
-# | egrep -v '^ *$' | sort -V | uniq | perl -pe 's@\n@.q, \\\n@g' | perl -pe 's@^@ @g'
-spark.query.files=add_part_multiple.q, \
- alter_merge_orc.q, \
- alter_merge_stats_orc.q, \
- annotate_stats_join.q, \
- auto_join0.q, \
- auto_join1.q, \
- auto_join10.q, \
- auto_join11.q, \
- auto_join12.q, \
- auto_join13.q, \
- auto_join14.q, \
- auto_join15.q, \
- auto_join16.q, \
- auto_join17.q, \
- auto_join18.q, \
- auto_join18_multi_distinct.q, \
- auto_join19.q, \
- auto_join2.q, \
- auto_join20.q, \
- auto_join21.q, \
- auto_join22.q, \
- auto_join23.q, \
- auto_join24.q, \
- auto_join26.q, \
- auto_join27.q, \
- auto_join28.q, \
- auto_join29.q, \
- auto_join3.q, \
- auto_join30.q, \
- auto_join31.q, \
- auto_join4.q, \
- auto_join5.q, \
- auto_join6.q, \
- auto_join7.q, \
- auto_join8.q, \
- auto_join9.q, \
- auto_join_filters.q, \
- auto_join_nulls.q, \
- auto_join_reordering_values.q, \
- auto_join_stats.q, \
- auto_join_stats2.q, \
- auto_join_without_localtask.q, \
- auto_smb_mapjoin_14.q, \
- auto_sortmerge_join_1.q, \
- auto_sortmerge_join_10.q, \
- auto_sortmerge_join_12.q, \
- auto_sortmerge_join_13.q, \
- auto_sortmerge_join_14.q, \
- auto_sortmerge_join_15.q, \
- auto_sortmerge_join_16.q, \
- auto_sortmerge_join_3.q, \
- auto_sortmerge_join_4.q, \
- auto_sortmerge_join_5.q, \
- auto_sortmerge_join_6.q, \
- auto_sortmerge_join_7.q, \
- auto_sortmerge_join_8.q, \
- auto_sortmerge_join_9.q, \
- avro_compression_enabled_native.q, \
- avro_decimal_native.q, \
- avro_joins.q, \
- avro_joins_native.q, \
- bucket2.q, \
- bucket3.q, \
- bucket4.q, \
- bucket_map_join_1.q, \
- bucket_map_join_2.q, \
- bucket_map_join_spark1.q, \
- bucket_map_join_spark2.q, \
- bucket_map_join_spark3.q, \
- bucket_map_join_spark4.q, \
- bucket_map_join_tez1.q, \
- bucket_map_join_tez2.q, \
- bucketmapjoin1.q, \
- bucketmapjoin10.q, \
- bucketmapjoin11.q, \
- bucketmapjoin12.q, \
- bucketmapjoin13.q, \
- bucketmapjoin2.q, \
- bucketmapjoin3.q, \
- bucketmapjoin4.q, \
- bucketmapjoin5.q, \
- bucketmapjoin7.q, \
- bucketmapjoin8.q, \
- bucketmapjoin9.q, \
- bucketmapjoin_negative.q, \
- bucketmapjoin_negative2.q, \
- bucketmapjoin_negative3.q, \
- bucketsortoptimize_insert_2.q, \
- bucketsortoptimize_insert_4.q, \
- bucketsortoptimize_insert_6.q, \
- bucketsortoptimize_insert_7.q, \
- bucketsortoptimize_insert_8.q, \
- cbo_gby.q, \
- cbo_gby_empty.q, \
- cbo_limit.q, \
- cbo_semijoin.q, \
- cbo_simple_select.q, \
- cbo_stats.q, \
- cbo_subq_in.q, \
- cbo_subq_not_in.q, \
- cbo_udf_udaf.q, \
- cbo_union.q, \
- column_access_stats.q, \
- count.q, \
- create_merge_compressed.q, \
- cross_join.q, \
- cross_product_check_1.q, \
- cross_product_check_2.q, \
- ctas.q, \
- custom_input_output_format.q, \
- date_join1.q, \
- date_udf.q, \
- decimal_1_1.q, \
- decimal_join.q, \
- disable_merge_for_bucketing.q, \
- dynamic_rdd_cache.q, \
- enforce_order.q, \
- escape_clusterby1.q, \
- escape_distributeby1.q, \
- escape_orderby1.q, \
- escape_sortby1.q, \
- filter_join_breaktask.q, \
- filter_join_breaktask2.q, \
- groupby1.q, \
- groupby10.q, \
- groupby11.q, \
- groupby1_map.q, \
- groupby1_map_nomap.q, \
- groupby1_map_skew.q, \
- groupby1_noskew.q, \
- groupby2.q, \
- groupby2_map.q, \
- groupby2_map_multi_distinct.q, \
- groupby2_map_skew.q, \
- groupby2_noskew.q, \
- groupby2_noskew_multi_distinct.q, \
- groupby3.q, \
- groupby3_map.q, \
- groupby3_map_multi_distinct.q, \
- groupby3_map_skew.q, \
- groupby3_noskew.q, \
- groupby3_noskew_multi_distinct.q, \
- groupby4.q, \
- groupby4_map.q, \
- groupby4_map_skew.q, \
- groupby4_noskew.q, \
- groupby5.q, \
- groupby5_map.q, \
- groupby5_map_skew.q, \
- groupby5_noskew.q, \
- groupby6.q, \
- groupby6_map.q, \
- groupby6_map_skew.q, \
- groupby6_noskew.q, \
- groupby7.q, \
- groupby7_map.q, \
- groupby7_map_multi_single_reducer.q, \
- groupby7_map_skew.q, \
- groupby7_noskew.q, \
- groupby7_noskew_multi_single_reducer.q, \
- groupby8.q, \
- groupby8_map.q, \
- groupby8_map_skew.q, \
- groupby8_noskew.q, \
- groupby9.q, \
- groupby_bigdata.q, \
- groupby_complex_types.q, \
- groupby_complex_types_multi_single_reducer.q, \
- groupby_cube1.q, \
- groupby_grouping_id2.q, \
- groupby_map_ppr.q, \
- groupby_map_ppr_multi_distinct.q, \
- groupby_multi_insert_common_distinct.q, \
- groupby_multi_single_reducer.q, \
- groupby_multi_single_reducer2.q, \
- groupby_multi_single_reducer3.q, \
- groupby_position.q, \
- groupby_ppr.q, \
- groupby_ppr_multi_distinct.q, \
- groupby_resolution.q, \
- groupby_rollup1.q, \
- groupby_sort_1_23.q, \
- groupby_sort_skew_1.q, \
- groupby_sort_skew_1_23.q, \
- qroupby_limit_extrastep.q, \
- having.q, \
- identity_project_remove_skip.q, \
- index_auto_self_join.q, \
- innerjoin.q, \
- input12.q, \
- input13.q, \
- input14.q, \
- input17.q, \
- input18.q, \
- input1_limit.q, \
- input_part2.q, \
- insert_into1.q, \
- insert_into2.q, \
- insert_into3.q, \
- join0.q, \
- join1.q, \
- join10.q, \
- join11.q, \
- join12.q, \
- join13.q, \
- join14.q, \
- join15.q, \
- join16.q, \
- join17.q, \
- join18.q, \
- join18_multi_distinct.q, \
- join19.q, \
- join2.q, \
- join20.q, \
- join21.q, \
- join22.q, \
- join23.q, \
- join24.q, \
- join25.q, \
- join26.q, \
- join27.q, \
- join28.q, \
- join29.q, \
- join3.q, \
- join30.q, \
- join31.q, \
- join32.q, \
- join32_lessSize.q, \
- join33.q, \
- join34.q, \
- join35.q, \
- join36.q, \
- join37.q, \
- join38.q, \
- join39.q, \
- join4.q, \
- join41.q, \
- join5.q, \
- join6.q, \
- join7.q, \
- join8.q, \
- join9.q, \
- join_1to1.q, \
- join_alt_syntax.q, \
- join_array.q, \
- join_casesensitive.q, \
- join_cond_pushdown_1.q, \
- join_cond_pushdown_2.q, \
- join_cond_pushdown_3.q, \
- join_cond_pushdown_4.q, \
- join_cond_pushdown_unqual1.q, \
- join_cond_pushdown_unqual2.q, \
- join_cond_pushdown_unqual3.q, \
- join_cond_pushdown_unqual4.q, \
- join_filters_overlap.q, \
- join_hive_626.q, \
- join_literals.q, \
- join_map_ppr.q, \
- join_merge_multi_expressions.q, \
- join_merging.q, \
- join_nullsafe.q, \
- join_rc.q, \
- join_reorder.q, \
- join_reorder2.q, \
- join_reorder3.q, \
- join_reorder4.q, \
- join_star.q, \
- join_thrift.q, \
- join_vc.q, \
- join_view.q, \
- lateral_view_explode2.q, \
- leftsemijoin.q, \
- leftsemijoin_mr.q, \
- limit_partition_metadataonly.q, \
- limit_pushdown.q, \
- list_bucket_dml_2.q, \
- load_dyn_part1.q, \
- load_dyn_part10.q, \
- load_dyn_part11.q, \
- load_dyn_part12.q, \
- load_dyn_part13.q, \
- load_dyn_part14.q, \
- load_dyn_part15.q, \
- load_dyn_part2.q, \
- load_dyn_part3.q, \
- load_dyn_part4.q, \
- load_dyn_part5.q, \
- load_dyn_part6.q, \
- load_dyn_part7.q, \
- load_dyn_part8.q, \
- load_dyn_part9.q, \
- louter_join_ppr.q, \
- mapjoin1.q, \
- mapjoin_addjar.q, \
- mapjoin_decimal.q, \
- mapjoin_distinct.q, \
- mapjoin_filter_on_outerjoin.q, \
- mapjoin_mapjoin.q, \
- mapjoin_memcheck.q, \
- mapjoin_subquery.q, \
- mapjoin_subquery2.q, \
- mapjoin_test_outer.q, \
- mapreduce1.q, \
- mapreduce2.q, \
- merge1.q, \
- merge2.q, \
- mergejoins.q, \
- mergejoins_mixed.q, \
- metadata_only_queries.q, \
- metadata_only_queries_with_filters.q, \
- multi_insert.q, \
- multi_insert_gby.q, \
- multi_insert_gby2.q, \
- multi_insert_gby3.q, \
- multi_insert_lateral_view.q, \
- multi_insert_mixed.q, \
- multi_insert_move_tasks_share_dependencies.q, \
- multi_insert_with_join.q, \
- multi_join_union.q, \
- multi_join_union_src.q, \
- multigroupby_singlemr.q, \
- nullgroup.q, \
- nullgroup2.q, \
- nullgroup4.q, \
- nullgroup4_multi_distinct.q, \
- optimize_nullscan.q, \
- order.q, \
- order2.q, \
- outer_join_ppr.q, \
- parallel.q, \
- parallel_join0.q, \
- parallel_join1.q, \
- parquet_join.q, \
- pcr.q, \
- ppd_gby_join.q, \
- ppd_join.q, \
- ppd_join2.q, \
- ppd_join3.q, \
- ppd_join5.q, \
- ppd_join_filter.q, \
- ppd_multi_insert.q, \
- ppd_outer_join1.q, \
- ppd_outer_join2.q, \
- ppd_outer_join3.q, \
- ppd_outer_join4.q, \
- ppd_outer_join5.q, \
- ppd_transform.q, \
- ptf.q, \
- ptf_decimal.q, \
- ptf_general_queries.q, \
- ptf_matchpath.q, \
- ptf_rcfile.q, \
- ptf_register_tblfn.q, \
- ptf_seqfile.q, \
- ptf_streaming.q, \
- rcfile_bigdata.q, \
- reduce_deduplicate_exclude_join.q, \
- router_join_ppr.q, \
- runtime_skewjoin_mapjoin_spark.q, \
- sample1.q, \
- sample10.q, \
- sample2.q, \
- sample3.q, \
- sample4.q, \
- sample5.q, \
- sample6.q, \
- sample7.q, \
- sample8.q, \
- sample9.q, \
- script_env_var1.q, \
- script_env_var2.q, \
- script_pipe.q, \
- scriptfile1.q, \
- semijoin.q, \
- skewjoin.q, \
- skewjoin_noskew.q, \
- skewjoin_union_remove_1.q, \
- skewjoin_union_remove_2.q, \
- skewjoinopt1.q, \
- skewjoinopt10.q, \
- skewjoinopt11.q, \
- skewjoinopt12.q, \
- skewjoinopt13.q, \
- skewjoinopt14.q, \
- skewjoinopt15.q, \
- skewjoinopt16.q, \
- skewjoinopt17.q, \
- skewjoinopt18.q, \
- skewjoinopt19.q, \
- skewjoinopt2.q, \
- skewjoinopt20.q, \
- skewjoinopt3.q, \
- skewjoinopt4.q, \
- skewjoinopt5.q, \
- skewjoinopt6.q, \
- skewjoinopt7.q, \
- skewjoinopt8.q, \
- skewjoinopt9.q, \
- smb_mapjoin_1.q, \
- smb_mapjoin_10.q, \
- smb_mapjoin_11.q, \
- smb_mapjoin_12.q, \
- smb_mapjoin_13.q, \
- smb_mapjoin_14.q, \
- smb_mapjoin_15.q, \
- smb_mapjoin_16.q, \
- smb_mapjoin_17.q, \
- smb_mapjoin_18.q, \
- smb_mapjoin_19.q, \
- smb_mapjoin_2.q, \
- smb_mapjoin_20.q, \
- smb_mapjoin_21.q, \
- smb_mapjoin_22.q, \
- smb_mapjoin_25.q, \
- smb_mapjoin_3.q, \
- smb_mapjoin_4.q, \
- smb_mapjoin_5.q, \
- smb_mapjoin_6.q, \
- smb_mapjoin_7.q, \
- smb_mapjoin_8.q, \
- smb_mapjoin_9.q, \
- sort.q, \
- stats0.q, \
- stats1.q, \
- stats10.q, \
- stats12.q, \
- stats13.q, \
- stats14.q, \
- stats15.q, \
- stats16.q, \
- stats18.q, \
- stats2.q, \
- stats3.q, \
- stats5.q, \
- stats6.q, \
- stats7.q, \
- stats8.q, \
- stats9.q, \
- stats_noscan_1.q, \
- stats_noscan_2.q, \
- stats_only_null.q, \
- stats_partscan_1_23.q, \
- statsfs.q, \
- subquery_exists.q, \
- subquery_in.q, \
- subquery_multiinsert.q, \
- table_access_keys_stats.q, \
- temp_table.q, \
- temp_table_gb1.q, \
- temp_table_join1.q, \
- tez_join_tests.q, \
- tez_joins_explain.q, \
- timestamp_1.q, \
- timestamp_2.q, \
- timestamp_3.q, \
- timestamp_comparison.q, \
- timestamp_lazy.q, \
- timestamp_null.q, \
- timestamp_udf.q, \
- transform2.q, \
- transform_ppr1.q, \
- transform_ppr2.q, \
- udaf_collect_set.q, \
- udf_example_add.q, \
- udf_in_file.q, \
- udf_max.q, \
- udf_min.q, \
- udf_percentile.q, \
- union.q, \
- union10.q, \
- union11.q, \
- union12.q, \
- union13.q, \
- union14.q, \
- union15.q, \
- union16.q, \
- union17.q, \
- union18.q, \
- union19.q, \
- union2.q, \
- union20.q, \
- union21.q, \
- union22.q, \
- union23.q, \
- union24.q, \
- union25.q, \
- union26.q, \
- union27.q, \
- union28.q, \
- union29.q, \
- union3.q, \
- union30.q, \
- union31.q, \
- union32.q, \
- union33.q, \
- union34.q, \
- union4.q, \
- union5.q, \
- union6.q, \
- union7.q, \
- union8.q, \
- union9.q, \
- union_date.q, \
- union_date_trim.q, \
- union_lateralview.q, \
- union_null.q, \
- union_ppr.q, \
- union_remove_1.q, \
- union_remove_10.q, \
- union_remove_11.q, \
- union_remove_12.q, \
- union_remove_13.q, \
- union_remove_14.q, \
- union_remove_15.q, \
- union_remove_16.q, \
- union_remove_17.q, \
- union_remove_18.q, \
- union_remove_19.q, \
- union_remove_2.q, \
- union_remove_20.q, \
- union_remove_21.q, \
- union_remove_22.q, \
- union_remove_23.q, \
- union_remove_24.q, \
- union_remove_25.q, \
- union_remove_3.q, \
- union_remove_4.q, \
- union_remove_5.q, \
- union_remove_6.q, \
- union_remove_6_subq.q, \
- union_remove_7.q, \
- union_remove_8.q, \
- union_remove_9.q, \
- union_script.q, \
- union_top_level.q, \
- union_view.q, \
- uniquejoin.q, \
- varchar_join1.q, \
- vector_between_in.q, \
- vector_cast_constant.q, \
- vector_char_4.q, \
- vector_count_distinct.q, \
- vector_data_types.q, \
- vector_decimal_aggregate.q, \
- vector_decimal_mapjoin.q, \
- vector_distinct_2.q, \
- vector_elt.q, \
- vector_groupby_3.q, \
- vector_left_outer_join.q, \
- vector_mapjoin_reduce.q, \
- vector_orderby_5.q, \
- vector_string_concat.q, \
- vector_varchar_4.q, \
- vectorization_0.q, \
- vectorization_1.q, \
- vectorization_10.q, \
- vectorization_11.q, \
- vectorization_12.q, \
- vectorization_13.q, \
- vectorization_14.q, \
- vectorization_15.q, \
- vectorization_16.q, \
- vectorization_17.q, \
- vectorization_2.q, \
- vectorization_3.q, \
- vectorization_4.q, \
- vectorization_5.q, \
- vectorization_6.q, \
- vectorization_9.q, \
- vectorization_decimal_date.q, \
- vectorization_div0.q, \
- vectorization_nested_udf.q, \
- vectorization_not.q, \
- vectorization_part.q, \
- vectorization_part_project.q, \
- vectorization_pushdown.q, \
- vectorization_short_regress.q, \
- vectorized_case.q, \
- vectorized_mapjoin.q, \
- vectorized_math_funcs.q, \
- vectorized_nested_mapjoin.q, \
- vectorized_ptf.q, \
- vectorized_rcfile_columnar.q, \
- vectorized_shufflejoin.q, \
- vectorized_string_funcs.q, \
- vectorized_timestamp_funcs.q, \
- windowing.q
-
-# Unlike "spark.query.files" above, these tests only run
-# under Spark engine.
-spark.only.query.files=spark_dynamic_partition_pruning.q,\
- spark_dynamic_partition_pruning_2.q,\
- spark_vectorized_dynamic_partition_pruning.q
-
-miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
- bucket4.q,\
- bucket5.q,\
- bucket6.q,\
- bucketizedhiveinputformat.q,\
- bucketmapjoin6.q,\
- bucketmapjoin7.q,\
- constprog_partitioner.q,\
- constprog_semijoin.q,\
- disable_merge_for_bucketing.q,\
- empty_dir_in_table.q,\
- external_table_with_space_in_location_path.q,\
- file_with_header_footer.q,\
- gen_udf_example_add10.q,\
- import_exported_table.q,\
- index_bitmap3.q,\
- index_bitmap_auto.q,\
- infer_bucket_sort_bucketed_table.q,\
- infer_bucket_sort_map_operators.q,\
- infer_bucket_sort_merge.q,\
- infer_bucket_sort_num_buckets.q,\
- infer_bucket_sort_reducers_power_two.q,\
- input16_cc.q,\
- insert_overwrite_directory2.q,\
- leftsemijoin_mr.q,\
- list_bucket_dml_10.q,\
- load_fs2.q,\
- load_hdfs_file_with_space_in_the_name.q,\
- orc_merge1.q,\
- orc_merge2.q,\
- orc_merge3.q,\
- orc_merge4.q,\
- orc_merge5.q,\
- orc_merge6.q,\
- orc_merge7.q,\
- orc_merge8.q,\
- orc_merge9.q,\
- orc_merge_diff_fs.q,\
- orc_merge_incompat1.q,\
- orc_merge_incompat2.q,\
- parallel_orderby.q,\
- quotedid_smb.q,\
- reduce_deduplicate.q,\
- remote_script.q,\
- root_dir_external_table.q,\
- schemeAuthority.q,\
- schemeAuthority2.q,\
- scriptfile1.q,\
- scriptfile1_win.q,\
- temp_table_external.q,\
- truncate_column_buckets.q,\
- uber_reduce.q,\
- vector_inner_join.q,\
- vector_outer_join0.q,\
- vector_outer_join1.q,\
- vector_outer_join2.q,\
- vector_outer_join3.q,\
- vector_outer_join4.q,\
- vector_outer_join5.q
-
-# These tests are removed from miniSparkOnYarn.query.files
-# ql_rewrite_gbtoidx.q,\
-# ql_rewrite_gbtoidx_cbo_1.q,\
-# smb_mapjoin_8.q,\
-
-
-spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\
- groupby2_multi_distinct.q,\
- groupby3_map_skew_multi_distinct.q,\
- groupby3_multi_distinct.q,\
- groupby_grouping_sets7.q
http://git-wip-us.apache.org/repos/asf/hive/blob/bc75e46a/ql/src/java/org/apache/hadoop/hive/ql/Context.java.orig
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java.orig b/ql/src/java/org/apache/hadoop/hive/ql/Context.java.orig
deleted file mode 100644
index 4667f68..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java.orig
+++ /dev/null
@@ -1,829 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql;
-
-import java.io.DataInput;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.net.URI;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.antlr.runtime.TokenRewriteStream;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.ContentSummary;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.hive.common.BlobStorageUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.exec.TaskRunner;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.hooks.WriteEntity;
-import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager.Heartbeater;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
-import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
-import org.apache.hadoop.hive.ql.lockmgr.LockException;
-import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.util.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Context for Semantic Analyzers. Usage: not reusable - construct a new one for
- * each query should call clear() at end of use to remove temporary folders
- */
-public class Context {
- private boolean isHDFSCleanup;
- private Path resFile;
- private Path resDir;
- private FileSystem resFs;
- private static final Logger LOG = LoggerFactory.getLogger("hive.ql.Context");
- private Path[] resDirPaths;
- private int resDirFilesNum;
- boolean initialized;
- String originalTracker = null;
- private final CompilationOpContext opContext;
- private final Map<String, ContentSummary> pathToCS = new ConcurrentHashMap<String, ContentSummary>();
-
- // scratch path to use for all non-local (ie. hdfs) file system tmp folders
- private final Path nonLocalScratchPath;
-
- // scratch directory to use for local file system tmp folders
- private final String localScratchDir;
-
- // the permission to scratch directory (local and hdfs)
- private final String scratchDirPermission;
-
- // Keeps track of scratch directories created for different scheme/authority
- private final Map<String, Path> fsScratchDirs = new HashMap<String, Path>();
-
- private final Configuration conf;
- protected int pathid = 10000;
- protected boolean explain = false;
- protected String cboInfo;
- protected boolean cboSucceeded;
- protected boolean explainLogical = false;
- protected String cmd = "";
- // number of previous attempts
- protected int tryCount = 0;
- private TokenRewriteStream tokenRewriteStream;
-
- private String executionId;
-
- // List of Locks for this query
- protected List<HiveLock> hiveLocks;
-
- // Transaction manager for this query
- protected HiveTxnManager hiveTxnManager;
-
- // Used to track what type of acid operation (insert, update, or delete) we are doing. Useful
- // since we want to change where bucket columns are accessed in some operators and
- // optimizations when doing updates and deletes.
- private AcidUtils.Operation acidOperation = AcidUtils.Operation.NOT_ACID;
-
- private boolean needLockMgr;
-
- private AtomicInteger sequencer = new AtomicInteger();
-
- private final Map<String, Table> cteTables = new HashMap<String, Table>();
-
- // Keep track of the mapping from load table desc to the output and the lock
- private final Map<LoadTableDesc, WriteEntity> loadTableOutputMap =
- new HashMap<LoadTableDesc, WriteEntity>();
- private final Map<WriteEntity, List<HiveLockObj>> outputLockObjects =
- new HashMap<WriteEntity, List<HiveLockObj>>();
-
- private final String stagingDir;
-
- private Heartbeater heartbeater;
-
- private boolean skipTableMasking;
-
- public Context(Configuration conf) throws IOException {
- this(conf, generateExecutionId());
- }
-
- /**
- * Create a Context with a given executionId. ExecutionId, together with
- * user name and conf, will determine the temporary directory locations.
- */
- public Context(Configuration conf, String executionId) {
- this.conf = conf;
- this.executionId = executionId;
-
- // local & non-local tmp location is configurable. however it is the same across
- // all external file systems
- nonLocalScratchPath = new Path(SessionState.getHDFSSessionPath(conf), executionId);
- localScratchDir = new Path(SessionState.getLocalSessionPath(conf), executionId).toUri().getPath();
- scratchDirPermission = HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIRPERMISSION);
- stagingDir = HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR);
- opContext = new CompilationOpContext();
- }
-
-
- public Map<LoadTableDesc, WriteEntity> getLoadTableOutputMap() {
- return loadTableOutputMap;
- }
-
- public Map<WriteEntity, List<HiveLockObj>> getOutputLockObjects() {
- return outputLockObjects;
- }
-
- /**
- * Set the context on whether the current query is an explain query.
- * @param value true if the query is an explain query, false if not
- */
- public void setExplain(boolean value) {
- explain = value;
- }
-
- /**
- * Find whether the current query is an explain query
- * @return true if the query is an explain query, false if not
- */
- public boolean getExplain() {
- return explain;
- }
-
- /**
- * Find whether the current query is a logical explain query
- */
- public boolean getExplainLogical() {
- return explainLogical;
- }
-
- /**
- * Set the context on whether the current query is a logical
- * explain query.
- */
- public void setExplainLogical(boolean explainLogical) {
- this.explainLogical = explainLogical;
- }
-
- /**
- * Set the original query command.
- * @param cmd the original query command string
- */
- public void setCmd(String cmd) {
- this.cmd = cmd;
- }
-
- /**
- * Find the original query command.
- * @return the original query command string
- */
- public String getCmd () {
- return cmd;
- }
-
- /**
- * Gets a temporary staging directory related to a path.
- * If a path already contains a staging directory, then returns the current directory; otherwise
- * create the directory if needed.
- *
- * @param inputPath URI of the temporary directory
- * @param mkdir Create the directory if True.
- * @return A temporary path.
- */
- private Path getStagingDir(Path inputPath, boolean mkdir) {
- final URI inputPathUri = inputPath.toUri();
- final String inputPathName = inputPathUri.getPath();
- final String fileSystem = inputPathUri.getScheme() + ":" + inputPathUri.getAuthority();
- final FileSystem fs;
-
- try {
- fs = inputPath.getFileSystem(conf);
- } catch (IOException e) {
- throw new IllegalStateException("Error getting FileSystem for " + inputPath + ": "+ e, e);
- }
-
- String stagingPathName;
- if (inputPathName.indexOf(stagingDir) == -1) {
- stagingPathName = new Path(inputPathName, stagingDir).toString();
- } else {
- stagingPathName = inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length());
- }
-
- final String key = fileSystem + "-" + stagingPathName + "-" + TaskRunner.getTaskRunnerID();
-
- Path dir = fsScratchDirs.get(key);
- if (dir == null) {
- // Append task specific info to stagingPathName, instead of creating a sub-directory.
- // This way we don't have to worry about deleting the stagingPathName separately at
- // end of query execution.
- dir = fs.makeQualified(new Path(stagingPathName + "_" + this.executionId + "-" + TaskRunner.getTaskRunnerID()));
-
- LOG.debug("Created staging dir = " + dir + " for path = " + inputPath);
-
- if (mkdir) {
- try {
- boolean inheritPerms = HiveConf.getBoolVar(conf,
- HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
- if (!FileUtils.mkdir(fs, dir, inheritPerms, conf)) {
- throw new IllegalStateException("Cannot create staging directory '" + dir.toString() + "'");
- }
-
- if (isHDFSCleanup) {
- fs.deleteOnExit(dir);
- }
- } catch (IOException e) {
- throw new RuntimeException("Cannot create staging directory '" + dir.toString() + "': " + e.getMessage(), e);
- }
- }
-
- fsScratchDirs.put(key, dir);
- }
-
- return dir;
- }
-
- /**
- * Get a tmp directory on specified URI
- *
- * @param scheme Scheme of the target FS
- * @param authority Authority of the target FS
- * @param mkdir create the directory if true
- * @param scratchDir path of tmp directory
- */
- private Path getScratchDir(String scheme, String authority,
- boolean mkdir, String scratchDir) {
-
- String fileSystem = scheme + ":" + authority;
- Path dir = fsScratchDirs.get(fileSystem + "-" + TaskRunner.getTaskRunnerID());
-
- if (dir == null) {
- Path dirPath = new Path(scheme, authority,
- scratchDir + "-" + TaskRunner.getTaskRunnerID());
- if (mkdir) {
- try {
- FileSystem fs = dirPath.getFileSystem(conf);
- dirPath = new Path(fs.makeQualified(dirPath).toString());
- FsPermission fsPermission = new FsPermission(scratchDirPermission);
-
- if (!fs.mkdirs(dirPath, fsPermission)) {
- throw new RuntimeException("Cannot make directory: "
- + dirPath.toString());
- }
- if (isHDFSCleanup) {
- fs.deleteOnExit(dirPath);
- }
- } catch (IOException e) {
- throw new RuntimeException (e);
- }
- }
- dir = dirPath;
- fsScratchDirs.put(fileSystem + "-" + TaskRunner.getTaskRunnerID(), dir);
-
- }
-
- return dir;
- }
-
-
- /**
- * Create a local scratch directory on demand and return it.
- */
- public Path getLocalScratchDir(boolean mkdir) {
- try {
- FileSystem fs = FileSystem.getLocal(conf);
- URI uri = fs.getUri();
- return getScratchDir(uri.getScheme(), uri.getAuthority(),
- mkdir, localScratchDir);
- } catch (IOException e) {
- throw new RuntimeException (e);
- }
- }
-
-
- /**
- * Create a map-reduce scratch directory on demand and return it.
- *
- */
- public Path getMRScratchDir() {
-
- // if we are executing entirely on the client side - then
- // just (re)use the local scratch directory
- if(isLocalOnlyExecutionMode()) {
- return getLocalScratchDir(!explain);
- }
-
- try {
- Path dir = FileUtils.makeQualified(nonLocalScratchPath, conf);
- URI uri = dir.toUri();
-
- Path newScratchDir = getScratchDir(uri.getScheme(), uri.getAuthority(),
- !explain, uri.getPath());
- LOG.info("New scratch dir is " + newScratchDir);
- return newScratchDir;
- } catch (IOException e) {
- throw new RuntimeException(e);
- } catch (IllegalArgumentException e) {
- throw new RuntimeException("Error while making MR scratch "
- + "directory - check filesystem config (" + e.getCause() + ")", e);
- }
- }
-
- /**
- * Create a temporary directory depending of the path specified.
- * - If path is an Object store filesystem, then use the default MR scratch directory (HDFS)
- * - If path is on HDFS, then create a staging directory inside the path
- *
- * @param path Path used to verify the Filesystem to use for temporary directory
- * @return A path to the new temporary directory
- */
- public Path getTempDirForPath(Path path) {
- boolean isLocal = isPathLocal(path);
- if ((BlobStorageUtils.isBlobStoragePath(conf, path) && !BlobStorageUtils.isBlobStorageAsScratchDir(conf))
- || isLocal) {
- // For better write performance, we use HDFS for temporary data when object store is used.
- // Note that the scratch directory configuration variable must use HDFS or any other non-blobstorage system
- // to take advantage of this performance.
- return getMRTmpPath();
- } else {
- return getExtTmpPathRelTo(path);
- }
- }
-
- /*
- * Checks if the path is for the local filesystem or not
- */
- private boolean isPathLocal(Path path) {
- boolean isLocal = false;
- if (path != null) {
- String scheme = path.toUri().getScheme();
- if (scheme != null) {
- isLocal = scheme.equals(Utilities.HADOOP_LOCAL_FS_SCHEME);
- }
- }
- return isLocal;
- }
-
- private Path getExternalScratchDir(URI extURI) {
- return getStagingDir(new Path(extURI.getScheme(), extURI.getAuthority(), extURI.getPath()), !explain);
- }
-
- /**
- * Remove any created scratch directories.
- */
- public void removeScratchDir() {
- for (Map.Entry<String, Path> entry : fsScratchDirs.entrySet()) {
- try {
- Path p = entry.getValue();
- FileSystem fs = p.getFileSystem(conf);
- fs.delete(p, true);
- fs.cancelDeleteOnExit(p);
- } catch (Exception e) {
- LOG.warn("Error Removing Scratch: "
- + StringUtils.stringifyException(e));
- }
- }
- fsScratchDirs.clear();
- }
-
- /**
- * Remove any created directories for CTEs.
- */
- public void removeMaterializedCTEs() {
- // clean CTE tables
- for (Table materializedTable : cteTables.values()) {
- Path location = materializedTable.getDataLocation();
- try {
- FileSystem fs = location.getFileSystem(conf);
- boolean status = fs.delete(location, true);
- LOG.info("Removed " + location + " for materialized "
- + materializedTable.getTableName() + ", status=" + status);
- } catch (IOException e) {
- // ignore
- LOG.warn("Error removing " + location + " for materialized " + materializedTable.getTableName() +
- ": " + StringUtils.stringifyException(e));
- }
- }
- cteTables.clear();
- }
-
- private String nextPathId() {
- return Integer.toString(pathid++);
- }
-
-
- private static final String MR_PREFIX = "-mr-";
- private static final String EXT_PREFIX = "-ext-";
- private static final String LOCAL_PREFIX = "-local-";
-
- /**
- * Check if path is for intermediate data
- * @return true if a uri is a temporary uri for map-reduce intermediate data,
- * false otherwise
- */
- public boolean isMRTmpFileURI(String uriStr) {
- return (uriStr.indexOf(executionId) != -1) &&
- (uriStr.indexOf(MR_PREFIX) != -1);
- }
-
- public Path getMRTmpPath(URI uri) {
- return new Path(getStagingDir(new Path(uri), !explain), MR_PREFIX + nextPathId());
- }
-
- /**
- * Get a path to store map-reduce intermediate data in.
- *
- * @return next available path for map-red intermediate data
- */
- public Path getMRTmpPath() {
- return new Path(getMRScratchDir(), MR_PREFIX +
- nextPathId());
- }
-
- /**
- * Get a tmp path on local host to store intermediate data.
- *
- * @return next available tmp path on local fs
- */
- public Path getLocalTmpPath() {
- return new Path(getLocalScratchDir(true), LOCAL_PREFIX + nextPathId());
- }
-
- /**
- * Get a path to store tmp data destined for external Path.
- *
- * @param path external Path to which the tmp data has to be eventually moved
- * @return next available tmp path on the file system corresponding extURI
- */
- public Path getExternalTmpPath(Path path) {
- URI extURI = path.toUri();
- if (extURI.getScheme().equals("viewfs")) {
- // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/..
- // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir
- // on same namespace as tbl dir.
- return getExtTmpPathRelTo(path.getParent());
- }
- return new Path(getExternalScratchDir(extURI), EXT_PREFIX +
- nextPathId());
- }
-
- /**
- * This is similar to getExternalTmpPath() with difference being this method returns temp path
- * within passed in uri, whereas getExternalTmpPath() ignores passed in path and returns temp
- * path within /tmp
- */
- public Path getExtTmpPathRelTo(Path path) {
- return new Path(getStagingDir(path, !explain), EXT_PREFIX + nextPathId());
- }
-
- /**
- * @return the resFile
- */
- public Path getResFile() {
- return resFile;
- }
-
- /**
- * @param resFile
- * the resFile to set
- */
- public void setResFile(Path resFile) {
- this.resFile = resFile;
- resDir = null;
- resDirPaths = null;
- resDirFilesNum = 0;
- }
-
- /**
- * @return the resDir
- */
- public Path getResDir() {
- return resDir;
- }
-
- /**
- * @param resDir
- * the resDir to set
- */
- public void setResDir(Path resDir) {
- this.resDir = resDir;
- resFile = null;
-
- resDirFilesNum = 0;
- resDirPaths = null;
- }
-
- public void clear() throws IOException {
- if (resDir != null) {
- try {
- FileSystem fs = resDir.getFileSystem(conf);
- fs.delete(resDir, true);
- } catch (IOException e) {
- LOG.info("Context clear error: " + StringUtils.stringifyException(e));
- }
- }
-
- if (resFile != null) {
- try {
- FileSystem fs = resFile.getFileSystem(conf);
- fs.delete(resFile, false);
- } catch (IOException e) {
- LOG.info("Context clear error: " + StringUtils.stringifyException(e));
- }
- }
- removeMaterializedCTEs();
- removeScratchDir();
- originalTracker = null;
- setNeedLockMgr(false);
- }
-
- public DataInput getStream() {
- try {
- if (!initialized) {
- initialized = true;
- if ((resFile == null) && (resDir == null)) {
- return null;
- }
-
- if (resFile != null) {
- return resFile.getFileSystem(conf).open(resFile);
- }
-
- resFs = resDir.getFileSystem(conf);
- FileStatus status = resFs.getFileStatus(resDir);
- assert status.isDir();
- FileStatus[] resDirFS = resFs.globStatus(new Path(resDir + "/*"), FileUtils.HIDDEN_FILES_PATH_FILTER);
- resDirPaths = new Path[resDirFS.length];
- int pos = 0;
- for (FileStatus resFS : resDirFS) {
- if (!resFS.isDir()) {
- resDirPaths[pos++] = resFS.getPath();
- }
- }
- if (pos == 0) {
- return null;
- }
-
- return resFs.open(resDirPaths[resDirFilesNum++]);
- } else {
- return getNextStream();
- }
- } catch (FileNotFoundException e) {
- LOG.info("getStream error: " + StringUtils.stringifyException(e));
- return null;
- } catch (IOException e) {
- LOG.info("getStream error: " + StringUtils.stringifyException(e));
- return null;
- }
- }
-
- private DataInput getNextStream() {
- try {
- if (resDir != null && resDirFilesNum < resDirPaths.length
- && (resDirPaths[resDirFilesNum] != null)) {
- return resFs.open(resDirPaths[resDirFilesNum++]);
- }
- } catch (FileNotFoundException e) {
- LOG.info("getNextStream error: " + StringUtils.stringifyException(e));
- return null;
- } catch (IOException e) {
- LOG.info("getNextStream error: " + StringUtils.stringifyException(e));
- return null;
- }
-
- return null;
- }
-
- public void resetStream() {
- if (initialized) {
- resDirFilesNum = 0;
- initialized = false;
- }
- }
-
- /**
- * Little abbreviation for StringUtils.
- */
- private static boolean strEquals(String str1, String str2) {
- return org.apache.commons.lang.StringUtils.equals(str1, str2);
- }
-
- /**
- * Set the token rewrite stream being used to parse the current top-level SQL
- * statement. Note that this should <b>not</b> be used for other parsing
- * activities; for example, when we encounter a reference to a view, we switch
- * to a new stream for parsing the stored view definition from the catalog,
- * but we don't clobber the top-level stream in the context.
- *
- * @param tokenRewriteStream
- * the stream being used
- */
- public void setTokenRewriteStream(TokenRewriteStream tokenRewriteStream) {
- assert (this.tokenRewriteStream == null);
- this.tokenRewriteStream = tokenRewriteStream;
- }
-
- /**
- * @return the token rewrite stream being used to parse the current top-level
- * SQL statement, or null if it isn't available (e.g. for parser
- * tests)
- */
- public TokenRewriteStream getTokenRewriteStream() {
- return tokenRewriteStream;
- }
-
- /**
- * Generate a unique executionId. An executionId, together with user name and
- * the configuration, will determine the temporary locations of all intermediate
- * files.
- *
- * In the future, users can use the executionId to resume a query.
- */
- public static String generateExecutionId() {
- Random rand = new Random();
- SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss_SSS");
- String executionId = "hive_" + format.format(new Date()) + "_"
- + Math.abs(rand.nextLong());
- return executionId;
- }
-
- /**
- * Does Hive wants to run tasks entirely on the local machine
- * (where the query is being compiled)?
- *
- * Today this translates into running hadoop jobs locally
- */
- public boolean isLocalOnlyExecutionMode() {
- // Always allow spark to run in a cluster mode. Without this, depending on
- // user's local hadoop settings, true may be returned, which causes plan to be
- // stored in local path.
- if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
- return false;
- }
-
- return ShimLoader.getHadoopShims().isLocalMode(conf);
- }
-
- public List<HiveLock> getHiveLocks() {
- return hiveLocks;
- }
-
- public void setHiveLocks(List<HiveLock> hiveLocks) {
- this.hiveLocks = hiveLocks;
- }
-
- public HiveTxnManager getHiveTxnManager() {
- return hiveTxnManager;
- }
-
- public void setHiveTxnManager(HiveTxnManager txnMgr) {
- hiveTxnManager = txnMgr;
- }
-
- public void setOriginalTracker(String originalTracker) {
- this.originalTracker = originalTracker;
- }
-
- public void restoreOriginalTracker() {
- if (originalTracker != null) {
- ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, originalTracker);
- originalTracker = null;
- }
- }
-
- public void addCS(String path, ContentSummary cs) {
- pathToCS.put(path, cs);
- }
-
- public ContentSummary getCS(Path path) {
- return getCS(path.toString());
- }
-
- public ContentSummary getCS(String path) {
- return pathToCS.get(path);
- }
-
- public Map<String, ContentSummary> getPathToCS() {
- return pathToCS;
- }
-
- public Configuration getConf() {
- return conf;
- }
-
- /**
- * @return the isHDFSCleanup
- */
- public boolean isHDFSCleanup() {
- return isHDFSCleanup;
- }
-
- /**
- * @param isHDFSCleanup the isHDFSCleanup to set
- */
- public void setHDFSCleanup(boolean isHDFSCleanup) {
- this.isHDFSCleanup = isHDFSCleanup;
- }
-
- public boolean isNeedLockMgr() {
- return needLockMgr;
- }
-
- public void setNeedLockMgr(boolean needLockMgr) {
- this.needLockMgr = needLockMgr;
- }
-
- public int getTryCount() {
- return tryCount;
- }
-
- public void setTryCount(int tryCount) {
- this.tryCount = tryCount;
- }
-
- public void setAcidOperation(AcidUtils.Operation op) {
- acidOperation = op;
- }
-
- public AcidUtils.Operation getAcidOperation() {
- return acidOperation;
- }
-
- public String getCboInfo() {
- return cboInfo;
- }
-
- public void setCboInfo(String cboInfo) {
- this.cboInfo = cboInfo;
- }
-
- public boolean isCboSucceeded() {
- return cboSucceeded;
- }
-
- public void setCboSucceeded(boolean cboSucceeded) {
- this.cboSucceeded = cboSucceeded;
- }
-
- public Table getMaterializedTable(String cteName) {
- return cteTables.get(cteName);
- }
-
- public void addMaterializedTable(String cteName, Table table) {
- cteTables.put(cteName, table);
- }
-
- public AtomicInteger getSequencer() {
- return sequencer;
- }
-
- public CompilationOpContext getOpContext() {
- return opContext;
- }
-
- public Heartbeater getHeartbeater() {
- return heartbeater;
- }
-
- public void setHeartbeater(Heartbeater heartbeater) {
- this.heartbeater = heartbeater;
- }
-
- public void checkHeartbeaterLockException() throws LockException {
- if (getHeartbeater() != null && getHeartbeater().getLockException() != null) {
- throw getHeartbeater().getLockException();
- }
- }
-
- public boolean isSkipTableMasking() {
- return skipTableMasking;
- }
-
- public void setSkipTableMasking(boolean skipTableMasking) {
- this.skipTableMasking = skipTableMasking;
- }
-}