You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/04/27 20:48:45 UTC
svn commit: r1590452 - in /hive/trunk:
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
serde/src/java/org/...
Author: brock
Date: Sun Apr 27 18:48:44 2014
New Revision: 1590452
URL: http://svn.apache.org/r1590452
Log:
HIVE-6785 - query fails when partitioned table's table level serde is ParquetHiveSerDe and partition level serde is of different SerDe (Tongji via Brock)
Added:
hive/trunk/ql/src/test/queries/clientpositive/parquet_serde.q
hive/trunk/ql/src/test/results/clientpositive/parquet_serde.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java?rev=1590452&r1=1590451&r2=1590452&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java Sun Apr 27 18:48:44 2014
@@ -136,6 +136,13 @@ public class ArrayWritableObjectInspecto
return arr.get()[((StructFieldImpl) fieldRef).getIndex()];
}
+ //since setStructFieldData and create return a list, getStructFieldData should be able to
+ //handle list data. This is required when table serde is ParquetHiveSerDe and partition serde
+ //is something else.
+ if (data instanceof List) {
+ return ((List) data).get(((StructFieldImpl) fieldRef).getIndex());
+ }
+
throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java?rev=1590452&r1=1590451&r2=1590452&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java Sun Apr 27 18:48:44 2014
@@ -14,8 +14,7 @@
package org.apache.hadoop.hive.ql.io.parquet.serde.primitive;
import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector;
import org.apache.hadoop.io.Text;
@@ -25,10 +24,10 @@ import parquet.io.api.Binary;
* The ParquetStringInspector inspects a BinaryWritable to give a Text or String.
*
*/
-public class ParquetStringInspector extends AbstractPrimitiveJavaObjectInspector implements SettableStringObjectInspector {
+public class ParquetStringInspector extends JavaStringObjectInspector implements SettableStringObjectInspector {
ParquetStringInspector() {
- super(TypeInfoFactory.stringTypeInfo);
+ super();
}
@Override
Added: hive/trunk/ql/src/test/queries/clientpositive/parquet_serde.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_serde.q?rev=1590452&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_serde.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_serde.q Sun Apr 27 18:48:44 2014
@@ -0,0 +1,30 @@
+DROP TABLE if exists parquet_mixed_fileformat;
+
+CREATE TABLE parquet_mixed_fileformat (
+ id int,
+ str string,
+ part string
+) PARTITIONED BY (dateint int)
+ ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+---- partition dateint=20140330 is stored as TEXTFILE
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_mixed_fileformat PARTITION (dateint=20140330);
+
+SELECT * FROM parquet_mixed_fileformat;
+
+DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330);
+
+---change table serde and file format to PARQUET----
+
+ALTER TABLE parquet_mixed_fileformat set SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe';
+ALTER TABLE parquet_mixed_fileformat
+ SET FILEFORMAT
+ INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+ OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';
+
+DESCRIBE FORMATTED parquet_mixed_fileformat;
+DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330);
+
+SELECT * FROM parquet_mixed_fileformat;
Added: hive/trunk/ql/src/test/results/clientpositive/parquet_serde.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_serde.q.out?rev=1590452&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_serde.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_serde.q.out Sun Apr 27 18:48:44 2014
@@ -0,0 +1,212 @@
+PREHOOK: query: DROP TABLE if exists parquet_mixed_fileformat
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE if exists parquet_mixed_fileformat
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE parquet_mixed_fileformat (
+ id int,
+ str string,
+ part string
+) PARTITIONED BY (dateint int)
+ ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: CREATE TABLE parquet_mixed_fileformat (
+ id int,
+ str string,
+ part string
+) PARTITIONED BY (dateint int)
+ ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_mixed_fileformat
+PREHOOK: query: ---- partition dateint=20140330 is stored as TEXTFILE
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_mixed_fileformat PARTITION (dateint=20140330)
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@parquet_mixed_fileformat
+POSTHOOK: query: ---- partition dateint=20140330 is stored as TEXTFILE
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_mixed_fileformat PARTITION (dateint=20140330)
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@parquet_mixed_fileformat
+POSTHOOK: Output: default@parquet_mixed_fileformat@dateint=20140330
+PREHOOK: query: SELECT * FROM parquet_mixed_fileformat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_mixed_fileformat
+PREHOOK: Input: default@parquet_mixed_fileformat@dateint=20140330
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_mixed_fileformat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: Input: default@parquet_mixed_fileformat@dateint=20140330
+#### A masked pattern was here ####
+1 foo part1 20140330
+2 bar part2 20140330
+3 baz part2 20140330
+PREHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@parquet_mixed_fileformat
+# col_name data_type comment
+
+id int
+str string
+part string
+
+# Partition Information
+# col_name data_type comment
+
+dateint int
+
+# Detailed Partition Information
+Partition Value: [20140330]
+Database: default
+Table: parquet_mixed_fileformat
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 36
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ field.delim |
+ serialization.format |
+PREHOOK: query: ---change table serde and file format to PARQUET----
+
+ALTER TABLE parquet_mixed_fileformat set SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+PREHOOK: type: ALTERTABLE_SERIALIZER
+PREHOOK: Input: default@parquet_mixed_fileformat
+PREHOOK: Output: default@parquet_mixed_fileformat
+POSTHOOK: query: ---change table serde and file format to PARQUET----
+
+ALTER TABLE parquet_mixed_fileformat set SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+POSTHOOK: type: ALTERTABLE_SERIALIZER
+POSTHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: Output: default@parquet_mixed_fileformat
+PREHOOK: query: ALTER TABLE parquet_mixed_fileformat
+ SET FILEFORMAT
+ INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+ OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@parquet_mixed_fileformat
+PREHOOK: Output: default@parquet_mixed_fileformat
+POSTHOOK: query: ALTER TABLE parquet_mixed_fileformat
+ SET FILEFORMAT
+ INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+ OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: Output: default@parquet_mixed_fileformat
+PREHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@parquet_mixed_fileformat
+# col_name data_type comment
+
+id int
+str string
+part string
+
+# Partition Information
+# col_name data_type comment
+
+dateint int
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ field.delim |
+ serialization.format |
+PREHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: query: DESCRIBE FORMATTED parquet_mixed_fileformat PARTITION (dateint=20140330)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@parquet_mixed_fileformat
+# col_name data_type comment
+
+id int
+str string
+part string
+
+# Partition Information
+# col_name data_type comment
+
+dateint int
+
+# Detailed Partition Information
+Partition Value: [20140330]
+Database: default
+Table: parquet_mixed_fileformat
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 36
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ field.delim |
+ serialization.format |
+PREHOOK: query: SELECT * FROM parquet_mixed_fileformat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_mixed_fileformat
+PREHOOK: Input: default@parquet_mixed_fileformat@dateint=20140330
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_mixed_fileformat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_mixed_fileformat
+POSTHOOK: Input: default@parquet_mixed_fileformat@dateint=20140330
+#### A masked pattern was here ####
+1 foo part1 20140330
+2 bar part2 20140330
+3 baz part2 20140330
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java?rev=1590452&r1=1590451&r2=1590452&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java Sun Apr 27 18:48:44 2014
@@ -27,7 +27,7 @@ public class JavaStringObjectInspector e
AbstractPrimitiveJavaObjectInspector implements
SettableStringObjectInspector {
- JavaStringObjectInspector() {
+ protected JavaStringObjectInspector() {
super(TypeInfoFactory.stringTypeInfo);
}