You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/11/30 18:55:46 UTC
hive git commit: HIVE-15311: Analyze column stats should skip
non-primitive column types (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 97c3fb396 -> bb9cae67c
HIVE-15311: Analyze column stats should skip non-primitive column types (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bb9cae67
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bb9cae67
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bb9cae67
Branch: refs/heads/master
Commit: bb9cae67ce4ab41af3d14999dd0ceb6697a27617
Parents: 97c3fb3
Author: Pengcheng Xiong <px...@apache.org>
Authored: Wed Nov 30 10:55:13 2016 -0800
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Wed Nov 30 10:55:23 2016 -0800
----------------------------------------------------------------------
.../ql/parse/ColumnStatsSemanticAnalyzer.java | 38 ++++++++--
.../clientpositive/partial_column_stats.q | 9 +++
.../columnstats_tbllvl_complex_type.q.out | 2 +-
.../clientpositive/partial_column_stats.q.out | 74 ++++++++++++++++++++
4 files changed, 118 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bb9cae67/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
index ab131e2..ff07b42 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.parse;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -36,8 +37,14 @@ import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.session.OperationLog;
+import org.apache.hadoop.hive.ql.session.OperationLog.LoggingLevel;
import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
/**
* ColumnStatsSemanticAnalyzer.
@@ -48,6 +55,7 @@ import org.apache.hadoop.hive.serde.serdeConstants;
public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
private static final Logger LOG = LoggerFactory
.getLogger(ColumnStatsSemanticAnalyzer.class);
+ static final private LogHelper console = new LogHelper(LOG);
private ASTNode originalTree;
private ASTNode rewrittenTree;
@@ -211,16 +219,26 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
private List<String> getColumnTypes(List<String> colNames)
throws SemanticException{
- List<String> colTypes = new LinkedList<String>();
+ List<String> colTypes = new ArrayList<String>();
List<FieldSchema> cols = tbl.getCols();
+ List<String> copyColNames = new ArrayList<>();
+ copyColNames.addAll(colNames);
- for (String colName : colNames) {
- for (FieldSchema col: cols) {
+ for (String colName : copyColNames) {
+ for (FieldSchema col : cols) {
if (colName.equalsIgnoreCase(col.getName())) {
- colTypes.add(new String(col.getType()));
+ String type = col.getType();
+ TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type);
+ if (typeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ logTypeWarning(colName, type);
+ colNames.remove(colName);
+ } else {
+ colTypes.add(type);
+ }
}
}
}
+
return colTypes;
}
@@ -312,6 +330,18 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
}
}
+ private void logTypeWarning(String colName, String colType) {
+ String warning = "Only primitive type arguments are accepted but " + colType
+ + " is passed for " + colName + ".";
+ warning = "WARNING: " + warning;
+ console.printInfo(warning);
+ // Propagate warning to beeline via operation log.
+ OperationLog ol = OperationLog.getCurrentOperationLog();
+ if (ol != null) {
+ ol.writeOperationLog(LoggingLevel.EXECUTION, warning + "\n");
+ }
+ }
+
@Override
public void analyze(ASTNode ast, Context origCtx) throws SemanticException {
QB qb;
http://git-wip-us.apache.org/repos/asf/hive/blob/bb9cae67/ql/src/test/queries/clientpositive/partial_column_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/partial_column_stats.q b/ql/src/test/queries/clientpositive/partial_column_stats.q
new file mode 100644
index 0000000..8ff65ac
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/partial_column_stats.q
@@ -0,0 +1,9 @@
+set hive.mapred.mode=nonstrict;
+
+create table t1 (key int, data struct<name:string, id: string>, value string);
+
+explain analyze table t1 compute statistics for columns;
+
+analyze table t1 compute statistics for columns;
+
+desc formatted t1 value;
http://git-wip-us.apache.org/repos/asf/hive/blob/bb9cae67/ql/src/test/results/clientnegative/columnstats_tbllvl_complex_type.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/columnstats_tbllvl_complex_type.q.out b/ql/src/test/results/clientnegative/columnstats_tbllvl_complex_type.q.out
index 0bb1a0d..8956bea 100644
--- a/ql/src/test/results/clientnegative/columnstats_tbllvl_complex_type.q.out
+++ b/ql/src/test/results/clientnegative/columnstats_tbllvl_complex_type.q.out
@@ -28,4 +28,4 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/create_nested_type.txt
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@table_complex_type
-FAILED: UDFArgumentTypeException Only primitive type arguments are accepted but map<string,array<string>> is passed.
+FAILED: SemanticException [Error 30009]: Encountered parse error while parsing rewritten query
http://git-wip-us.apache.org/repos/asf/hive/blob/bb9cae67/ql/src/test/results/clientpositive/partial_column_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/partial_column_stats.q.out b/ql/src/test/results/clientpositive/partial_column_stats.q.out
new file mode 100644
index 0000000..59b52b0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/partial_column_stats.q.out
@@ -0,0 +1,74 @@
+PREHOOK: query: create table t1 (key int, data struct<name:string, id: string>, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (key int, data struct<name:string, id: string>, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: explain analyze table t1 compute statistics for columns
+PREHOOK: type: QUERY
+POSTHOOK: query: explain analyze table t1 compute statistics for columns
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+ Stage-1 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-0
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-1
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.t1
+
+PREHOOK: query: analyze table t1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table t1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted t1 value
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t1
+POSTHOOK: query: desc formatted t1 value
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t1
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+value string 0 0 0.0 0 from deserializer