You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/11/30 18:55:46 UTC

hive git commit: HIVE-15311: Analyze column stats should skip non-primitive column types (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 97c3fb396 -> bb9cae67c


HIVE-15311: Analyze column stats should skip non-primitive column types (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bb9cae67
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bb9cae67
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bb9cae67

Branch: refs/heads/master
Commit: bb9cae67ce4ab41af3d14999dd0ceb6697a27617
Parents: 97c3fb3
Author: Pengcheng Xiong <px...@apache.org>
Authored: Wed Nov 30 10:55:13 2016 -0800
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Wed Nov 30 10:55:23 2016 -0800

----------------------------------------------------------------------
 .../ql/parse/ColumnStatsSemanticAnalyzer.java   | 38 ++++++++--
 .../clientpositive/partial_column_stats.q       |  9 +++
 .../columnstats_tbllvl_complex_type.q.out       |  2 +-
 .../clientpositive/partial_column_stats.q.out   | 74 ++++++++++++++++++++
 4 files changed, 118 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/bb9cae67/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
index ab131e2..ff07b42 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.parse;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -36,8 +37,14 @@ import org.apache.hadoop.hive.ql.QueryState;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.session.OperationLog;
+import org.apache.hadoop.hive.ql.session.OperationLog.LoggingLevel;
 import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
 import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
 /**
  * ColumnStatsSemanticAnalyzer.
@@ -48,6 +55,7 @@ import org.apache.hadoop.hive.serde.serdeConstants;
 public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
   private static final Logger LOG = LoggerFactory
       .getLogger(ColumnStatsSemanticAnalyzer.class);
+  static final private LogHelper console = new LogHelper(LOG);
 
   private ASTNode originalTree;
   private ASTNode rewrittenTree;
@@ -211,16 +219,26 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
 
   private List<String> getColumnTypes(List<String> colNames)
       throws SemanticException{
-    List<String> colTypes = new LinkedList<String>();
+    List<String> colTypes = new ArrayList<String>();
     List<FieldSchema> cols = tbl.getCols();
+    List<String> copyColNames = new ArrayList<>();
+    copyColNames.addAll(colNames);
 
-    for (String colName : colNames) {
-      for (FieldSchema col: cols) {
+    for (String colName : copyColNames) {
+      for (FieldSchema col : cols) {
         if (colName.equalsIgnoreCase(col.getName())) {
-          colTypes.add(new String(col.getType()));
+          String type = col.getType();
+          TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type);
+          if (typeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) {
+            logTypeWarning(colName, type);
+            colNames.remove(colName);
+          } else {
+            colTypes.add(type);
+          }
         }
       }
     }
+    
     return colTypes;
   }
 
@@ -312,6 +330,18 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
     }
   }
 
+  private void logTypeWarning(String colName, String colType) {
+    String warning = "Only primitive type arguments are accepted but " + colType
+        + " is passed for " + colName + ".";
+    warning = "WARNING: " + warning;
+    console.printInfo(warning);
+    // Propagate warning to beeline via operation log.
+    OperationLog ol = OperationLog.getCurrentOperationLog();
+    if (ol != null) {
+      ol.writeOperationLog(LoggingLevel.EXECUTION, warning + "\n");
+    }
+  }
+
   @Override
   public void analyze(ASTNode ast, Context origCtx) throws SemanticException {
     QB qb;

http://git-wip-us.apache.org/repos/asf/hive/blob/bb9cae67/ql/src/test/queries/clientpositive/partial_column_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/partial_column_stats.q b/ql/src/test/queries/clientpositive/partial_column_stats.q
new file mode 100644
index 0000000..8ff65ac
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/partial_column_stats.q
@@ -0,0 +1,9 @@
+set hive.mapred.mode=nonstrict;
+
+create table t1 (key int, data struct<name:string, id: string>, value string);
+
+explain analyze table t1 compute statistics for columns;
+
+analyze table t1 compute statistics for columns;
+
+desc formatted t1 value;

http://git-wip-us.apache.org/repos/asf/hive/blob/bb9cae67/ql/src/test/results/clientnegative/columnstats_tbllvl_complex_type.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/columnstats_tbllvl_complex_type.q.out b/ql/src/test/results/clientnegative/columnstats_tbllvl_complex_type.q.out
index 0bb1a0d..8956bea 100644
--- a/ql/src/test/results/clientnegative/columnstats_tbllvl_complex_type.q.out
+++ b/ql/src/test/results/clientnegative/columnstats_tbllvl_complex_type.q.out
@@ -28,4 +28,4 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/create_nested_type.txt
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@table_complex_type
-FAILED: UDFArgumentTypeException Only primitive type arguments are accepted but map<string,array<string>> is passed.
+FAILED: SemanticException [Error 30009]: Encountered parse error while parsing rewritten query

http://git-wip-us.apache.org/repos/asf/hive/blob/bb9cae67/ql/src/test/results/clientpositive/partial_column_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/partial_column_stats.q.out b/ql/src/test/results/clientpositive/partial_column_stats.q.out
new file mode 100644
index 0000000..59b52b0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/partial_column_stats.q.out
@@ -0,0 +1,74 @@
+PREHOOK: query: create table t1 (key int, data struct<name:string, id: string>, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (key int, data struct<name:string, id: string>, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: explain analyze table t1 compute statistics for columns
+PREHOOK: type: QUERY
+POSTHOOK: query: explain analyze table t1 compute statistics for columns
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+  Stage-1 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-0
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: string)
+              outputColumnNames: key, value
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 16), compute_stats(value, 16)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-1
+    Column Stats Work
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.t1
+
+PREHOOK: query: analyze table t1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table t1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted t1 value
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t1
+POSTHOOK: query: desc formatted t1 value
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t1
+# col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
+	 	 	 	 	 	 	 	 	 	 
+value               	string              	                    	                    	0                   	0                   	0.0                 	0                   	                    	                    	from deserializer