You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/09/12 19:30:47 UTC

svn commit: r1624596 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java test/queries/clientpositive/stats_only_null.q test/results/clientpositive/stats_only_null.q.out

Author: hashutosh
Date: Fri Sep 12 17:30:46 2014
New Revision: 1624596

URL: http://svn.apache.org/r1624596
Log:
HIVE-8062 : Stats collection for columns fails on a partitioned table with null values in partitioning column (Ashutosh Chauhan via Gunther Hagleitner)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
    hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q
    hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java?rev=1624596&r1=1624595&r2=1624596&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java Fri Sep 12 17:30:46 2014
@@ -28,6 +28,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
@@ -305,8 +306,10 @@ public class ColumnStatsTask extends Tas
         List<String> partVals = new ArrayList<String>();
         // Iterate over partition columns to figure out partition name
         for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) {
-          partVals.add(((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()).
-            getPrimitiveJavaObject(list.get(i)).toString());
+          Object partVal = ((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()).
+              getPrimitiveJavaObject(list.get(i));
+          partVals.add(partVal == null ? // could be null for default partition
+            this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString());
         }
         partName = Warehouse.makePartName(partColSchema, partVals);
       }

Modified: hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q?rev=1624596&r1=1624595&r2=1624596&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q Fri Sep 12 17:30:46 2014
@@ -34,6 +34,17 @@ select count(*), count(a), count(b), cou
 
 select count(*), count(a), count(b), count(c), count(d) from stats_null;
 select count(*), count(a), count(b), count(c), count(d) from stats_null_part;
+
+drop table stats_null_part;
+set hive.exec.dynamic.partition.mode=nonstrict;
+CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) partitioned by (dt int) STORED AS TEXTFILE; 
+
+insert into table stats_null_part partition(dt) select a,b,c,d,b from temps_null ;
+analyze table stats_null_part compute statistics for columns;
+
+describe formatted stats_null_part.a partition(dt = 1);
+
+reset hive.exec.dynamic.partition.mode;
 drop table stats_null;
 drop table stats_null_part;
 drop table temps_null;

Modified: hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out?rev=1624596&r1=1624595&r2=1624596&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out Fri Sep 12 17:30:46 2014
@@ -334,6 +334,60 @@ POSTHOOK: query: select count(*), count(
 POSTHOOK: type: QUERY
 #### A masked pattern was here ####
 10	8	8	10	10
+PREHOOK: query: drop table stats_null_part
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@stats_null_part
+PREHOOK: Output: default@stats_null_part
+POSTHOOK: query: drop table stats_null_part
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@stats_null_part
+POSTHOOK: Output: default@stats_null_part
+PREHOOK: query: CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) partitioned by (dt int) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@stats_null_part
+POSTHOOK: query: CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) partitioned by (dt int) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@stats_null_part
+PREHOOK: query: insert into table stats_null_part partition(dt) select a,b,c,d,b from temps_null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@temps_null
+PREHOOK: Output: default@stats_null_part
+POSTHOOK: query: insert into table stats_null_part partition(dt) select a,b,c,d,b from temps_null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@temps_null
+POSTHOOK: Output: default@stats_null_part@dt=1
+POSTHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).a SIMPLE [(temps_null)temps_null.FieldSchema(name:a, type:double, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).b SIMPLE [(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).c SIMPLE [(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).d SIMPLE [(temps_null)temps_null.FieldSchema(name:d, type:smallint, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).a SIMPLE [(temps_null)temps_null.FieldSchema(name:a, type:double, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).b SIMPLE [(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).c SIMPLE [(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).d SIMPLE [(temps_null)temps_null.FieldSchema(name:d, type:smallint, comment:null), ]
+PREHOOK: query: analyze table stats_null_part compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_null_part
+PREHOOK: Input: default@stats_null_part@dt=1
+PREHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table stats_null_part compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_null_part
+POSTHOOK: Input: default@stats_null_part@dt=1
+POSTHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted stats_null_part.a partition(dt = 1)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@stats_null_part
+POSTHOOK: query: describe formatted stats_null_part.a partition(dt = 1)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@stats_null_part
+# col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
+	 	 	 	 	 	 	 	 	 	 
+a                   	double              	1.0                 	1.0                 	1                   	1                   	                    	                    	                    	                    	from deserializer   
 PREHOOK: query: drop table stats_null
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@stats_null