You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by st...@apache.org on 2018/03/14 16:46:21 UTC
hive git commit: HIVE-18343: Remove LinkedList from
ColumnStatsSemanticAnalyzer.java (BELUGA BEHR, reviewed by Sahil Takiar)
Repository: hive
Updated Branches:
refs/heads/master db4fe384f -> 9cdc08580
HIVE-18343: Remove LinkedList from ColumnStatsSemanticAnalyzer.java (BELUGA BEHR, reviewed by Sahil Takiar)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9cdc0858
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9cdc0858
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9cdc0858
Branch: refs/heads/master
Commit: 9cdc08580ebecd77f85169b70e1529e3be35db8c
Parents: db4fe38
Author: BELUGA BEHR <da...@gmail.com>
Authored: Wed Mar 14 09:45:55 2018 -0700
Committer: Sahil Takiar <st...@cloudera.com>
Committed: Wed Mar 14 09:45:55 2018 -0700
----------------------------------------------------------------------
.../ql/parse/ColumnStatsSemanticAnalyzer.java | 112 +++++++++----------
1 file changed, 54 insertions(+), 58 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/9cdc0858/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
index c97e2a9..2780be2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
@@ -20,12 +20,9 @@ package org.apache.hadoop.hive.ql.parse;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -45,6 +42,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* ColumnStatsSemanticAnalyzer.
* Handles semantic analysis and rewrite for gathering column statistics both at the level of a
@@ -54,7 +54,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
private static final Logger LOG = LoggerFactory
.getLogger(ColumnStatsSemanticAnalyzer.class);
- static final private LogHelper console = new LogHelper(LOG);
+ private static final LogHelper CONSOLE = new LogHelper(LOG);
private ASTNode originalTree;
private ASTNode rewrittenTree;
@@ -90,25 +90,25 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
return rwt;
}
- private List<String> getColumnName(ASTNode tree) throws SemanticException{
+ private List<String> getColumnName(ASTNode tree) throws SemanticException {
switch (tree.getChildCount()) {
- case 2:
- return Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
- case 3:
- int numCols = tree.getChild(2).getChildCount();
- List<String> colName = new LinkedList<String>();
- for (int i = 0; i < numCols; i++) {
- colName.add(i, new String(getUnescapedName((ASTNode) tree.getChild(2).getChild(i))));
- }
- return colName;
- default:
- throw new SemanticException("Internal error. Expected number of children of ASTNode to be"
- + " either 2 or 3. Found : " + tree.getChildCount());
+ case 2:
+ return Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
+ case 3:
+ int numCols = tree.getChild(2).getChildCount();
+ List<String> colName = new ArrayList<String>(numCols);
+ for (int i = 0; i < numCols; i++) {
+ colName.add(getUnescapedName((ASTNode) tree.getChild(2).getChild(i)));
+ }
+ return colName;
+ default:
+ throw new SemanticException("Internal error. Expected number of children of ASTNode to be"
+ + " either 2 or 3. Found : " + tree.getChildCount());
}
}
- private void handlePartialPartitionSpec(Map<String,String> partSpec, ColumnStatsAutoGatherContext context) throws
+ private void handlePartialPartitionSpec(Map<String, String> partSpec, ColumnStatsAutoGatherContext context) throws
SemanticException {
// If user has fully specified partition, validate that partition exists
@@ -133,21 +133,21 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
// User might have only specified partial list of partition keys, in which case add other partition keys in partSpec
List<String> partKeys = Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys());
- for (String partKey : partKeys){
- if(!partSpec.containsKey(partKey)) {
- partSpec.put(partKey, null);
- }
- }
-
- // Check if user have erroneously specified non-existent partitioning columns
- for (String partKey : partSpec.keySet()) {
- if(!partKeys.contains(partKey)){
- throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_PART_KEY.getMsg() + " : " + partKey);
- }
- }
+ for (String partKey : partKeys) {
+ if (!partSpec.containsKey(partKey)) {
+ partSpec.put(partKey, null);
+ }
+ }
+
+ // Check if user have erroneously specified non-existent partitioning columns
+ for (String partKey : partSpec.keySet()) {
+ if (!partKeys.contains(partKey)) {
+ throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_PART_KEY.getMsg() + " : " + partKey);
+ }
+ }
}
- private StringBuilder genPartitionClause(Map<String,String> partSpec) throws SemanticException {
+ private StringBuilder genPartitionClause(Map<String, String> partSpec) throws SemanticException {
StringBuilder whereClause = new StringBuilder(" where ");
boolean predPresent = false;
StringBuilder groupByClause = new StringBuilder(" group by ");
@@ -165,12 +165,12 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
}
}
- for (FieldSchema fs : tbl.getPartitionKeys()) {
- if (!aggPresent) {
- aggPresent = true;
- } else {
- groupByClause.append(",");
- }
+ for (FieldSchema fs : tbl.getPartitionKeys()) {
+ if (!aggPresent) {
+ aggPresent = true;
+ } else {
+ groupByClause.append(',');
+ }
groupByClause.append("`" + fs.getName() + "`");
}
@@ -178,7 +178,7 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
return predPresent ? whereClause.append(groupByClause) : groupByClause;
}
- private String genPartValueString (String partKey, String partVal) throws SemanticException {
+ private String genPartValueString(String partKey, String partVal) throws SemanticException {
String returnVal = partVal;
String partColType = getColTypeOf(partKey);
if (partColType.equals(serdeConstants.STRING_TYPE_NAME) ||
@@ -186,13 +186,13 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
partColType.contains(serdeConstants.CHAR_TYPE_NAME)) {
returnVal = "'" + escapeSQLString(partVal) + "'";
} else if (partColType.equals(serdeConstants.TINYINT_TYPE_NAME)) {
- returnVal = partVal+"Y";
+ returnVal = partVal + "Y";
} else if (partColType.equals(serdeConstants.SMALLINT_TYPE_NAME)) {
- returnVal = partVal+"S";
+ returnVal = partVal + "S";
} else if (partColType.equals(serdeConstants.INT_TYPE_NAME)) {
returnVal = partVal;
} else if (partColType.equals(serdeConstants.BIGINT_TYPE_NAME)) {
- returnVal = partVal+"L";
+ returnVal = partVal + "L";
} else if (partColType.contains(serdeConstants.DECIMAL_TYPE_NAME)) {
returnVal = partVal + "BD";
} else if (partColType.equals(serdeConstants.DATE_TYPE_NAME) ||
@@ -206,22 +206,21 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
return returnVal;
}
- private String getColTypeOf (String partKey) throws SemanticException{
+ private String getColTypeOf(String partKey) throws SemanticException{
for (FieldSchema fs : tbl.getPartitionKeys()) {
if (partKey.equalsIgnoreCase(fs.getName())) {
return fs.getType().toLowerCase();
}
}
- throw new SemanticException ("Unknown partition key : " + partKey);
+ throw new SemanticException("Unknown partition key : " + partKey);
}
private List<String> getColumnTypes(List<String> colNames)
throws SemanticException{
List<String> colTypes = new ArrayList<String>();
List<FieldSchema> cols = tbl.getCols();
- List<String> copyColNames = new ArrayList<>();
- copyColNames.addAll(colNames);
+ List<String> copyColNames = new ArrayList<>(colNames);
for (String colName : copyColNames) {
for (FieldSchema col : cols) {
@@ -245,10 +244,9 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
return colName.replaceAll("`", "``");
}
- private String genRewrittenQuery(List<String> colNames, HiveConf conf, Map<String,String> partSpec,
- boolean isPartitionStats) throws SemanticException{
+ private String genRewrittenQuery(List<String> colNames, HiveConf conf, Map<String, String> partSpec,
+ boolean isPartitionStats) throws SemanticException{
StringBuilder rewrittenQueryBuilder = new StringBuilder("select ");
- String rewrittenQuery;
for (int i = 0; i < colNames.size(); i++) {
if (i > 0) {
@@ -258,7 +256,7 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
rewrittenQueryBuilder.append("compute_stats(`");
rewrittenQueryBuilder.append(escapeBackTicks(colNames.get(i)));
rewrittenQueryBuilder.append("`, '" + func + "'");
- if (func.equals("fm")) {
+ if ("fm".equals(func)) {
int numBitVectors = 0;
try {
numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
@@ -267,7 +265,7 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
}
rewrittenQueryBuilder.append(", " + numBitVectors);
}
- rewrittenQueryBuilder.append(")");
+ rewrittenQueryBuilder.append(')');
}
if (isPartitionStats) {
@@ -283,11 +281,11 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
// If partition level statistics is requested, add predicate and group by as needed to rewritten
// query
- if (isPartitionStats) {
+ if (isPartitionStats) {
rewrittenQueryBuilder.append(genPartitionClause(partSpec));
}
- rewrittenQuery = rewrittenQueryBuilder.toString();
+ String rewrittenQuery = rewrittenQueryBuilder.toString();
rewrittenQuery = new VariableSubstitution(new HiveVariableSource() {
@Override
public Map<String, String> getHiveVariable() {
@@ -298,7 +296,6 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
}
private ASTNode genRewrittenTree(String rewrittenQuery) throws SemanticException {
- ASTNode rewrittenTree;
// Parse the rewritten query string
try {
ctx = new Context(conf);
@@ -308,18 +305,17 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
ctx.setCmd(rewrittenQuery);
try {
- rewrittenTree = ParseUtils.parse(rewrittenQuery, ctx);
+ return ParseUtils.parse(rewrittenQuery, ctx);
} catch (ParseException e) {
throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_PARSE_ERROR.getMsg());
}
- return rewrittenTree;
}
// fail early if the columns specified for column statistics are not valid
private void validateSpecifiedColumnNames(List<String> specifiedCols)
throws SemanticException {
List<String> tableCols = Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
- for(String sc : specifiedCols) {
+ for (String sc : specifiedCols) {
if (!tableCols.contains(sc.toLowerCase())) {
String msg = "'" + sc + "' (possible columns are " + tableCols.toString() + ")";
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(msg));
@@ -344,7 +340,7 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
String warning = "Only primitive type arguments are accepted but " + colType
+ " is passed for " + colName + ".";
warning = "WARNING: " + warning;
- console.printInfo(warning);
+ CONSOLE.printInfo(warning);
}
@Override
@@ -367,7 +363,7 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
// Save away the original AST
originalTree = ast;
boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast);
- Map<String,String> partSpec = null;
+ Map<String, String> partSpec = null;
checkForPartitionColumns(
colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys()));
validateSpecifiedColumnNames(colNames);