You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/04/27 20:48:45 UTC

svn commit: r1590452 - in /hive/trunk: ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ serde/src/java/org/...

Author: brock
Date: Sun Apr 27 18:48:44 2014
New Revision: 1590452

URL: http://svn.apache.org/r1590452
Log:
HIVE-6785 - query fails when partitioned table's table level serde is ParquetHiveSerDe and partition level serde is of different SerDe (Tongji via Brock)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/parquet_serde.q
    hive/trunk/ql/src/test/results/clientpositive/parquet_serde.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java?rev=1590452&r1=1590451&r2=1590452&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java Sun Apr 27 18:48:44 2014
@@ -136,6 +136,13 @@ public class ArrayWritableObjectInspecto
       return arr.get()[((StructFieldImpl) fieldRef).getIndex()];
     }
 
+    //since setStructFieldData and create return a list, getStructFieldData should be able to
+    //handle list data. This is required when table serde is ParquetHiveSerDe and partition serde
+    //is something else.
+    if (data instanceof List) {
+      return ((List) data).get(((StructFieldImpl) fieldRef).getIndex());
+    }
+
     throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java?rev=1590452&r1=1590451&r2=1590452&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java Sun Apr 27 18:48:44 2014
@@ -14,8 +14,7 @@
 package org.apache.hadoop.hive.ql.io.parquet.serde.primitive;
 
 import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector;
 import org.apache.hadoop.io.Text;
 
@@ -25,10 +24,10 @@ import parquet.io.api.Binary;
  * The ParquetStringInspector inspects a BinaryWritable to give a Text or String.
  *
  */
-public class ParquetStringInspector extends AbstractPrimitiveJavaObjectInspector implements SettableStringObjectInspector {
+public class ParquetStringInspector extends JavaStringObjectInspector implements SettableStringObjectInspector {
 
   ParquetStringInspector() {
-    super(TypeInfoFactory.stringTypeInfo);
+    super();
   }
 
   @Override

Added: hive/trunk/ql/src/test/queries/clientpositive/parquet_serde.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_serde.q?rev=1590452&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_serde.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_serde.q Sun Apr 27 18:48:44 2014
@@ -0,0 +1,30 @@
+DROP TABLE if exists parquet_mixed_fileformat;
+
+CREATE TABLE parquet_mixed_fileformat (
+    id int,
+    str string,
+    part string
+) PARTITIONED BY (dateint int)
+ ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+---- partition dateint=20140330 is stored as TEXTFILE
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_mixed_fileformat PARTITION (dateint=20140330);
+
+SELECT * FROM parquet_mixed_fileformat;
+
+DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330);
+
+---change table serde and file format to PARQUET----
+
+ALTER TABLE parquet_mixed_fileformat set SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe';
+ALTER TABLE parquet_mixed_fileformat
+     SET FILEFORMAT
+     INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+     OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';
+
+DESCRIBE FORMATTED parquet_mixed_fileformat;
+DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330);
+
+SELECT * FROM parquet_mixed_fileformat;

Added: hive/trunk/ql/src/test/results/clientpositive/parquet_serde.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_serde.q.out?rev=1590452&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_serde.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_serde.q.out Sun Apr 27 18:48:44 2014
@@ -0,0 +1,212 @@
+PREHOOK: query: DROP TABLE if exists parquet_mixed_fileformat
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE if exists parquet_mixed_fileformat
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE parquet_mixed_fileformat (
+    id int,
+    str string,
+    part string
+) PARTITIONED BY (dateint int)
+ ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: CREATE TABLE parquet_mixed_fileformat (
+    id int,
+    str string,
+    part string
+) PARTITIONED BY (dateint int)
+ ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_mixed_fileformat
+PREHOOK: query: ---- partition dateint=20140330 is stored as TEXTFILE
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_mixed_fileformat PARTITION (dateint=20140330)
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@parquet_mixed_fileformat
+POSTHOOK: query: ---- partition dateint=20140330 is stored as TEXTFILE
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_mixed_fileformat PARTITION (dateint=20140330)
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@parquet_mixed_fileformat
+POSTHOOK: Output: default@parquet_mixed_fileformat@dateint=20140330
+PREHOOK: query: SELECT * FROM parquet_mixed_fileformat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_mixed_fileformat
+PREHOOK: Input: default@parquet_mixed_fileformat@dateint=20140330
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_mixed_fileformat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: Input: default@parquet_mixed_fileformat@dateint=20140330
+#### A masked pattern was here ####
+1	foo	part1	20140330
+2	bar	part2	20140330
+3	baz	part2	20140330
+PREHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@parquet_mixed_fileformat
+# col_name            	data_type           	comment             
+	 	 
+id                  	int                 	                    
+str                 	string              	                    
+part                	string              	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+dateint             	int                 	                    
+	 	 
+# Detailed Partition Information	 	 
+Partition Value:    	[20140330]          	 
+Database:           	default             	 
+Table:              	parquet_mixed_fileformat	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+#### A masked pattern was here ####
+Partition Parameters:	 	 
+	COLUMN_STATS_ACCURATE	true                
+	numFiles            	1                   
+	numRows             	0                   
+	rawDataSize         	0                   
+	totalSize           	36                  
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	field.delim         	|                   
+	serialization.format	|                   
+PREHOOK: query: ---change table serde and file format to PARQUET----
+
+ALTER TABLE parquet_mixed_fileformat set SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@parquet_mixed_fileformat
+PREHOOK: Output: default@parquet_mixed_fileformat
+POSTHOOK: query: ---change table serde and file format to PARQUET----
+
+ALTER TABLE parquet_mixed_fileformat set SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: Output: default@parquet_mixed_fileformat
+PREHOOK: query: ALTER TABLE parquet_mixed_fileformat
+     SET FILEFORMAT
+     INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+     OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@parquet_mixed_fileformat
+PREHOOK: Output: default@parquet_mixed_fileformat
+POSTHOOK: query: ALTER TABLE parquet_mixed_fileformat
+     SET FILEFORMAT
+     INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+     OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: Output: default@parquet_mixed_fileformat
+PREHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@parquet_mixed_fileformat
+# col_name            	data_type           	comment             
+	 	 
+id                  	int                 	                    
+str                 	string              	                    
+part                	string              	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+dateint             	int                 	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	field.delim         	|                   
+	serialization.format	|                   
+PREHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@parquet_mixed_fileformat
+# col_name            	data_type           	comment             
+	 	 
+id                  	int                 	                    
+str                 	string              	                    
+part                	string              	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+dateint             	int                 	                    
+	 	 
+# Detailed Partition Information	 	 
+Partition Value:    	[20140330]          	 
+Database:           	default             	 
+Table:              	parquet_mixed_fileformat	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+#### A masked pattern was here ####
+Partition Parameters:	 	 
+	COLUMN_STATS_ACCURATE	true                
+	numFiles            	1                   
+	numRows             	0                   
+	rawDataSize         	0                   
+	totalSize           	36                  
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	field.delim         	|                   
+	serialization.format	|                   
+PREHOOK: query: SELECT * FROM parquet_mixed_fileformat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_mixed_fileformat
+PREHOOK: Input: default@parquet_mixed_fileformat@dateint=20140330
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_mixed_fileformat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: Input: default@parquet_mixed_fileformat@dateint=20140330
+#### A masked pattern was here ####
+1	foo	part1	20140330
+2	bar	part2	20140330
+3	baz	part2	20140330

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java?rev=1590452&r1=1590451&r2=1590452&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java Sun Apr 27 18:48:44 2014
@@ -27,7 +27,7 @@ public class JavaStringObjectInspector e
     AbstractPrimitiveJavaObjectInspector implements
     SettableStringObjectInspector {
 
-  JavaStringObjectInspector() {
+  protected JavaStringObjectInspector() {
     super(TypeInfoFactory.stringTypeInfo);
   }