You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/09/12 19:30:47 UTC
svn commit: r1624596 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
test/queries/clientpositive/stats_only_null.q
test/results/clientpositive/stats_only_null.q.out
Author: hashutosh
Date: Fri Sep 12 17:30:46 2014
New Revision: 1624596
URL: http://svn.apache.org/r1624596
Log:
HIVE-8062 : Stats collection for columns fails on a partitioned table with null values in partitioning column (Ashutosh Chauhan via Gunther Hagleitner)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q
hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java?rev=1624596&r1=1624595&r2=1624596&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java Fri Sep 12 17:30:46 2014
@@ -28,6 +28,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
@@ -305,8 +306,10 @@ public class ColumnStatsTask extends Tas
List<String> partVals = new ArrayList<String>();
// Iterate over partition columns to figure out partition name
for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) {
- partVals.add(((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()).
- getPrimitiveJavaObject(list.get(i)).toString());
+ Object partVal = ((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()).
+ getPrimitiveJavaObject(list.get(i));
+ partVals.add(partVal == null ? // could be null for default partition
+ this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString());
}
partName = Warehouse.makePartName(partColSchema, partVals);
}
Modified: hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q?rev=1624596&r1=1624595&r2=1624596&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q Fri Sep 12 17:30:46 2014
@@ -34,6 +34,17 @@ select count(*), count(a), count(b), cou
select count(*), count(a), count(b), count(c), count(d) from stats_null;
select count(*), count(a), count(b), count(c), count(d) from stats_null_part;
+
+drop table stats_null_part;
+set hive.exec.dynamic.partition.mode=nonstrict;
+CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) partitioned by (dt int) STORED AS TEXTFILE;
+
+insert into table stats_null_part partition(dt) select a,b,c,d,b from temps_null ;
+analyze table stats_null_part compute statistics for columns;
+
+describe formatted stats_null_part.a partition(dt = 1);
+
+reset hive.exec.dynamic.partition.mode;
drop table stats_null;
drop table stats_null_part;
drop table temps_null;
Modified: hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out?rev=1624596&r1=1624595&r2=1624596&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out Fri Sep 12 17:30:46 2014
@@ -334,6 +334,60 @@ POSTHOOK: query: select count(*), count(
POSTHOOK: type: QUERY
#### A masked pattern was here ####
10 8 8 10 10
+PREHOOK: query: drop table stats_null_part
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@stats_null_part
+PREHOOK: Output: default@stats_null_part
+POSTHOOK: query: drop table stats_null_part
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@stats_null_part
+POSTHOOK: Output: default@stats_null_part
+PREHOOK: query: CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) partitioned by (dt int) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@stats_null_part
+POSTHOOK: query: CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) partitioned by (dt int) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@stats_null_part
+PREHOOK: query: insert into table stats_null_part partition(dt) select a,b,c,d,b from temps_null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@temps_null
+PREHOOK: Output: default@stats_null_part
+POSTHOOK: query: insert into table stats_null_part partition(dt) select a,b,c,d,b from temps_null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@temps_null
+POSTHOOK: Output: default@stats_null_part@dt=1
+POSTHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).a SIMPLE [(temps_null)temps_null.FieldSchema(name:a, type:double, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).b SIMPLE [(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).c SIMPLE [(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).d SIMPLE [(temps_null)temps_null.FieldSchema(name:d, type:smallint, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).a SIMPLE [(temps_null)temps_null.FieldSchema(name:a, type:double, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).b SIMPLE [(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).c SIMPLE [(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).d SIMPLE [(temps_null)temps_null.FieldSchema(name:d, type:smallint, comment:null), ]
+PREHOOK: query: analyze table stats_null_part compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_null_part
+PREHOOK: Input: default@stats_null_part@dt=1
+PREHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table stats_null_part compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_null_part
+POSTHOOK: Input: default@stats_null_part@dt=1
+POSTHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted stats_null_part.a partition(dt = 1)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@stats_null_part
+POSTHOOK: query: describe formatted stats_null_part.a partition(dt = 1)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@stats_null_part
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+a double 1.0 1.0 1 1 from deserializer
PREHOOK: query: drop table stats_null
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@stats_null