You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/09/10 09:44:59 UTC
svn commit: r1623931 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql:
QueryProperties.java parse/ColumnStatsSemanticAnalyzer.java
parse/GlobalLimitCtx.java parse/SemanticAnalyzer.java
parse/SemanticAnalyzerFactory.java parse/UnparseTranslator.java
Author: hashutosh
Date: Wed Sep 10 07:44:58 2014
New Revision: 1623931
URL: http://svn.apache.org/r1623931
Log:
HIVE-6550 : SemanticAnalyzer.reset() doesn't clear all the state (Sergey Shelukhin via Ashutosh Chauhan)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java?rev=1623931&r1=1623930&r2=1623931&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java Wed Sep 10 07:44:58 2014
@@ -143,4 +143,22 @@ public class QueryProperties {
public void setHasMapGroupBy(boolean hasMapGroupBy) {
this.hasMapGroupBy = hasMapGroupBy;
}
+
+ public void clear() {
+ hasJoin = false;
+ hasGroupBy = false;
+ hasOrderBy = false;
+ hasSortBy = false;
+ hasJoinFollowedByGroupBy = false;
+ hasPTF = false;
+ hasWindowing = false;
+
+ // does the query have a using clause
+ usesScript = false;
+
+ hasDistributeBy = false;
+ hasClusterBy = false;
+ mapJoinRemoved = false;
+ hasMapGroupBy = false;
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1623931&r1=1623930&r2=1623931&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java Wed Sep 10 07:44:58 2014
@@ -346,48 +346,6 @@ public class ColumnStatsSemanticAnalyzer
return rewrittenTree;
}
- public ColumnStatsSemanticAnalyzer(HiveConf conf, ASTNode tree) throws SemanticException {
- super(conf);
- // check if it is no scan. grammar prevents coexit noscan/columns
- super.processNoScanCommand(tree);
- // check if it is partial scan. grammar prevents coexit partialscan/columns
- super.processPartialScanCommand(tree);
- /* Rewrite only analyze table <> column <> compute statistics; Don't rewrite analyze table
- * command - table stats are collected by the table scan operator and is not rewritten to
- * an aggregation.
- */
- if (shouldRewrite(tree)) {
- tbl = getTable(tree);
- colNames = getColumnName(tree);
- // Save away the original AST
- originalTree = tree;
- boolean isPartitionStats = isPartitionLevelStats(tree);
- Map<String,String> partSpec = null;
- checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys()));
- validateSpecifiedColumnNames(colNames);
- if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned()) {
- isPartitionStats = true;
- }
-
- if (isPartitionStats) {
- isTableLevel = false;
- partSpec = getPartKeyValuePairsFromAST(tree);
- handlePartialPartitionSpec(partSpec);
- } else {
- isTableLevel = true;
- }
- colType = getColumnTypes(colNames);
- int numBitVectors = getNumBitVectorsForNDVEstimation(conf);
- rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats);
- rewrittenTree = genRewrittenTree(rewrittenQuery);
- } else {
- // Not an analyze table column compute statistics statement - don't do any rewrites
- originalTree = rewrittenTree = tree;
- rewrittenQuery = null;
- isRewritten = false;
- }
- }
-
// fail early if the columns specified for column statistics are not valid
private void validateSpecifiedColumnNames(List<String> specifiedCols)
throws SemanticException {
@@ -421,6 +379,46 @@ public class ColumnStatsSemanticAnalyzer
// initialize QB
init();
+ // check if it is no scan. grammar prevents coexit noscan/columns
+ super.processNoScanCommand(ast);
+ // check if it is partial scan. grammar prevents coexit partialscan/columns
+ super.processPartialScanCommand(ast);
+ /* Rewrite only analyze table <> column <> compute statistics; Don't rewrite analyze table
+ * command - table stats are collected by the table scan operator and is not rewritten to
+ * an aggregation.
+ */
+ if (shouldRewrite(ast)) {
+ tbl = getTable(ast);
+ colNames = getColumnName(ast);
+ // Save away the original AST
+ originalTree = ast;
+ boolean isPartitionStats = isPartitionLevelStats(ast);
+ Map<String,String> partSpec = null;
+ checkForPartitionColumns(
+ colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys()));
+ validateSpecifiedColumnNames(colNames);
+ if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned()) {
+ isPartitionStats = true;
+ }
+
+ if (isPartitionStats) {
+ isTableLevel = false;
+ partSpec = getPartKeyValuePairsFromAST(ast);
+ handlePartialPartitionSpec(partSpec);
+ } else {
+ isTableLevel = true;
+ }
+ colType = getColumnTypes(colNames);
+ int numBitVectors = getNumBitVectorsForNDVEstimation(conf);
+ rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats);
+ rewrittenTree = genRewrittenTree(rewrittenQuery);
+ } else {
+ // Not an analyze table column compute statistics statement - don't do any rewrites
+ originalTree = rewrittenTree = ast;
+ rewrittenQuery = null;
+ isRewritten = false;
+ }
+
// Setup the necessary metadata if originating from analyze rewrite
if (isRewritten) {
qb = getQB();
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java?rev=1623931&r1=1623930&r2=1623931&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java Wed Sep 10 07:44:58 2014
@@ -25,10 +25,14 @@ import org.apache.hadoop.hive.ql.plan.Li
*/
public class GlobalLimitCtx {
- private boolean enable = false;
- private int globalLimit = -1;
- private boolean hasTransformOrUDTF = false;
- private LimitDesc lastReduceLimitDesc = null;
+ private boolean enable;
+ private int globalLimit;
+ private boolean hasTransformOrUDTF;
+ private LimitDesc lastReduceLimitDesc;
+
+ public GlobalLimitCtx() {
+ reset();
+ }
public int getGlobalLimit() {
return globalLimit;
@@ -64,4 +68,11 @@ public class GlobalLimitCtx {
this.globalLimit = -1;
this.lastReduceLimitDesc = null;
}
+
+ public void reset() {
+ enable = false;
+ globalLimit = -1;
+ hasTransformOrUDTF = false;
+ lastReduceLimitDesc = null;
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1623931&r1=1623930&r2=1623931&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed Sep 10 07:44:58 2014
@@ -206,6 +206,8 @@ public class SemanticAnalyzer extends Ba
public static final String DUMMY_DATABASE = "_dummy_database";
public static final String DUMMY_TABLE = "_dummy_table";
+ // Max characters when auto generating the column name with func name
+ private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner;
private HashMap<TableScanOperator, PrunedPartitionList> opToPartList;
@@ -228,7 +230,7 @@ public class SemanticAnalyzer extends Ba
private HashMap<TableScanOperator, sampleDesc> opToSamplePruner;
private final Map<TableScanOperator, Map<String, ExprNodeDesc>> opToPartToSkewedPruner;
/**
- * a map for the split sampling, from ailias to an instance of SplitSample
+ * a map for the split sampling, from alias to an instance of SplitSample
* that describes percentage and number.
*/
private final HashMap<String, SplitSample> nameToSplitSample;
@@ -239,7 +241,7 @@ public class SemanticAnalyzer extends Ba
private ArrayList<String> viewsExpanded;
private ASTNode viewSelect;
private final UnparseTranslator unparseTranslator;
- private final GlobalLimitCtx globalLimitCtx = new GlobalLimitCtx();
+ private final GlobalLimitCtx globalLimitCtx;
// prefix for column names auto generated by hive
private final String autogenColAliasPrfxLbl;
@@ -250,16 +252,13 @@ public class SemanticAnalyzer extends Ba
// keeps track of aliases for V3, V3:V2, V3:V2:V1.
// This is used when T is added as an input for the query, the parents of T is
// derived from the alias V3:V2:V1:T
- private final Map<String, ReadEntity> viewAliasToInput = new HashMap<String, ReadEntity>();
-
- // Max characters when auto generating the column name with func name
- private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
+ private final Map<String, ReadEntity> viewAliasToInput;
// flag for no scan during analyze ... compute statistics
- protected boolean noscan = false;
+ protected boolean noscan;
//flag for partial scan during analyze ... compute statistics
- protected boolean partialscan = false;
+ protected boolean partialscan;
/*
* Capture the CTE definitions in a Query.
@@ -276,7 +275,6 @@ public class SemanticAnalyzer extends Ba
}
public SemanticAnalyzer(HiveConf conf) throws SemanticException {
-
super(conf);
opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>();
opToPartList = new HashMap<TableScanOperator, PrunedPartitionList>();
@@ -306,6 +304,9 @@ public class SemanticAnalyzer extends Ba
queryProperties = new QueryProperties();
opToPartToSkewedPruner = new HashMap<TableScanOperator, Map<String, ExprNodeDesc>>();
aliasToCTEs = new HashMap<String, ASTNode>();
+ globalLimitCtx = new GlobalLimitCtx();
+ viewAliasToInput = new HashMap<String, ReadEntity>();
+ noscan = partialscan = false;
}
@Override
@@ -326,6 +327,29 @@ public class SemanticAnalyzer extends Ba
groupOpToInputTables.clear();
prunedPartitions.clear();
aliasToCTEs.clear();
+ topToTable.clear();
+ opToPartPruner.clear();
+ opToPartList.clear();
+ opToPartToSkewedPruner.clear();
+ opToSamplePruner.clear();
+ nameToSplitSample.clear();
+ fsopToTable.clear();
+ resultSchema = null;
+ createVwDesc = null;
+ viewsExpanded = null;
+ viewSelect = null;
+ ctesExpanded = null;
+ noscan = false;
+ partialscan = false;
+ globalLimitCtx.disableOpt();
+ viewAliasToInput.clear();
+ reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
+ topToTableProps.clear();
+ listMapJoinOpsNoReducer.clear();
+ unparseTranslator.clear();
+ queryProperties.clear();
+ outputs.clear();
+ globalLimitCtx.reset();
}
public void initParseCtx(ParseContext pctx) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java?rev=1623931&r1=1623930&r2=1623931&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java Wed Sep 10 07:44:58 2014
@@ -263,7 +263,7 @@ public final class SemanticAnalyzerFacto
return new FunctionSemanticAnalyzer(conf);
case HiveParser.TOK_ANALYZE:
- return new ColumnStatsSemanticAnalyzer(conf, tree);
+ return new ColumnStatsSemanticAnalyzer(conf);
case HiveParser.TOK_CREATEMACRO:
case HiveParser.TOK_DROPMACRO:
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java?rev=1623931&r1=1623930&r2=1623931&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java Wed Sep 10 07:44:58 2014
@@ -262,4 +262,10 @@ class UnparseTranslator {
ASTNode targetNode;
ASTNode sourceNode;
}
+
+ public void clear() {
+ translations.clear();
+ copyTranslations.clear();
+ enabled = false;
+ }
}