You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2015/10/02 21:55:01 UTC

hive git commit: HIVE-11980 : Follow up on HIVE-11696, exception is thrown from CTAS from the table with table-level serde is Parquet while partition-level serde is JSON (Aihua Xu via Szehon)

Repository: hive
Updated Branches:
  refs/heads/master c3d62ad94 -> 0d36e8247


HIVE-11980 : Follow up on HIVE-11696, exception is thrown from CTAS from the table with table-level serde is Parquet while partition-level serde is JSON (Aihua Xu via Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0d36e824
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0d36e824
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0d36e824

Branch: refs/heads/master
Commit: 0d36e82479a47dac7e55875364503881fdbc069e
Parents: c3d62ad
Author: Szehon Ho <sz...@cloudera.com>
Authored: Fri Oct 2 12:54:08 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Fri Oct 2 12:54:51 2015 -0700

----------------------------------------------------------------------
 data/files/sample2.json                         |  2 +
 .../serde/ArrayWritableObjectInspector.java     |  7 ++
 .../parquet_mixed_partition_formats2.q          | 31 ++++++
 .../parquet_mixed_partition_formats2.q.out      | 99 ++++++++++++++++++++
 4 files changed, 139 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0d36e824/data/files/sample2.json
----------------------------------------------------------------------
diff --git a/data/files/sample2.json b/data/files/sample2.json
new file mode 100644
index 0000000..4e1802f
--- /dev/null
+++ b/data/files/sample2.json
@@ -0,0 +1,2 @@
+{"id": 1, "reports": [2,3], "address": {"country": 1, "state": 1}}
+{"id": 2, "reports": [], "address": {"country": 1, "state": 2}}

http://git-wip-us.apache.org/repos/asf/hive/blob/0d36e824/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
index 6091882..ae545b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
@@ -169,6 +169,13 @@ public class ArrayWritableObjectInspector extends SettableStructObjectInspector
       return new ArrayList<Object>(Arrays.asList(arrWritable));
     }
 
+    //since setStructFieldData and create return a list, getStructFieldData should be able to
+    //handle list data. This is required when table serde is ParquetHiveSerDe and partition serde
+    //is something else.
+    if (data instanceof List) {
+      return ((List) data);
+    }
+
     throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/0d36e824/ql/src/test/queries/clientpositive/parquet_mixed_partition_formats2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_mixed_partition_formats2.q b/ql/src/test/queries/clientpositive/parquet_mixed_partition_formats2.q
new file mode 100644
index 0000000..e0b21d1
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_mixed_partition_formats2.q
@@ -0,0 +1,31 @@
+add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar;
+
+CREATE TABLE parquet_table_json_partition (
+id bigint COMMENT 'from deserializer',
+address struct<country:bigint,state:bigint> COMMENT 'from deserializer',
+reports array<bigint> COMMENT 'from deserializer')
+PARTITIONED BY (
+ts string)
+ROW FORMAT SERDE
+'org.apache.hive.hcatalog.data.JsonSerDe'
+STORED AS INPUTFORMAT
+'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+LOAD DATA LOCAL INPATH '../../data/files/sample2.json' INTO TABLE parquet_table_json_partition PARTITION(ts='20150101');
+
+SELECT * FROM parquet_table_json_partition LIMIT 100;
+
+ALTER TABLE parquet_table_json_partition
+  SET FILEFORMAT INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+                 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+                 SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe';
+
+SELECT * FROM parquet_table_json_partition LIMIT 100;
+
+CREATE TABLE new_table AS SELECT * FROM parquet_table_json_partition LIMIT 100;
+
+SELECT * FROM new_table;
+
+

http://git-wip-us.apache.org/repos/asf/hive/blob/0d36e824/ql/src/test/results/clientpositive/parquet_mixed_partition_formats2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_mixed_partition_formats2.q.out b/ql/src/test/results/clientpositive/parquet_mixed_partition_formats2.q.out
new file mode 100644
index 0000000..c4d7197
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_mixed_partition_formats2.q.out
@@ -0,0 +1,99 @@
+PREHOOK: query: CREATE TABLE parquet_table_json_partition (
+id bigint COMMENT 'from deserializer',
+address struct<country:bigint,state:bigint> COMMENT 'from deserializer',
+reports array<bigint> COMMENT 'from deserializer')
+PARTITIONED BY (
+ts string)
+ROW FORMAT SERDE
+'org.apache.hive.hcatalog.data.JsonSerDe'
+STORED AS INPUTFORMAT
+'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet_table_json_partition
+POSTHOOK: query: CREATE TABLE parquet_table_json_partition (
+id bigint COMMENT 'from deserializer',
+address struct<country:bigint,state:bigint> COMMENT 'from deserializer',
+reports array<bigint> COMMENT 'from deserializer')
+PARTITIONED BY (
+ts string)
+ROW FORMAT SERDE
+'org.apache.hive.hcatalog.data.JsonSerDe'
+STORED AS INPUTFORMAT
+'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_table_json_partition
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/sample2.json' INTO TABLE parquet_table_json_partition PARTITION(ts='20150101')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@parquet_table_json_partition
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/sample2.json' INTO TABLE parquet_table_json_partition PARTITION(ts='20150101')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@parquet_table_json_partition
+POSTHOOK: Output: default@parquet_table_json_partition@ts=20150101
+PREHOOK: query: SELECT * FROM parquet_table_json_partition LIMIT 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_table_json_partition
+PREHOOK: Input: default@parquet_table_json_partition@ts=20150101
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_table_json_partition LIMIT 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_table_json_partition
+POSTHOOK: Input: default@parquet_table_json_partition@ts=20150101
+#### A masked pattern was here ####
+1	{"country":1,"state":1}	[2,3]	20150101
+2	{"country":1,"state":2}	[]	20150101
+PREHOOK: query: ALTER TABLE parquet_table_json_partition
+  SET FILEFORMAT INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+                 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+                 SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@parquet_table_json_partition
+PREHOOK: Output: default@parquet_table_json_partition
+POSTHOOK: query: ALTER TABLE parquet_table_json_partition
+  SET FILEFORMAT INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+                 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+                 SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@parquet_table_json_partition
+POSTHOOK: Output: default@parquet_table_json_partition
+PREHOOK: query: SELECT * FROM parquet_table_json_partition LIMIT 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_table_json_partition
+PREHOOK: Input: default@parquet_table_json_partition@ts=20150101
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_table_json_partition LIMIT 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_table_json_partition
+POSTHOOK: Input: default@parquet_table_json_partition@ts=20150101
+#### A masked pattern was here ####
+1	{"country":1,"state":1}	[2,3]	20150101
+2	{"country":1,"state":2}	[]	20150101
+PREHOOK: query: CREATE TABLE new_table AS SELECT * FROM parquet_table_json_partition LIMIT 100
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@parquet_table_json_partition
+PREHOOK: Input: default@parquet_table_json_partition@ts=20150101
+PREHOOK: Output: database:default
+PREHOOK: Output: default@new_table
+POSTHOOK: query: CREATE TABLE new_table AS SELECT * FROM parquet_table_json_partition LIMIT 100
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@parquet_table_json_partition
+POSTHOOK: Input: default@parquet_table_json_partition@ts=20150101
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@new_table
+PREHOOK: query: SELECT * FROM new_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@new_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM new_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@new_table
+#### A masked pattern was here ####
+2	{"country":1,"state":2}	[]	20150101
+1	{"country":1,"state":1}	[2,3]	20150101