You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/10/30 03:50:10 UTC
svn commit: r1635373 - in /hive/trunk: data/files/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
serde/src/java/org/apache/hadoop/hive/serde2/avro/
Author: brock
Date: Thu Oct 30 02:50:09 2014
New Revision: 1635373
URL: http://svn.apache.org/r1635373
Log:
HIVE-8577 - Cannot deserialize Avro schema with a map<string,string> with null values (Sergio Pena via Brock)
Added:
hive/trunk/data/files/map_null_schema.avro
hive/trunk/data/files/map_null_val.avro (with props)
hive/trunk/ql/src/test/queries/clientpositive/avro_deserialize_map_null.q
hive/trunk/ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out
Modified:
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
Added: hive/trunk/data/files/map_null_schema.avro
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/map_null_schema.avro?rev=1635373&view=auto
==============================================================================
--- hive/trunk/data/files/map_null_schema.avro (added)
+++ hive/trunk/data/files/map_null_schema.avro Thu Oct 30 02:50:09 2014
@@ -0,0 +1 @@
+{"type":"record","name":"dku_record_0","namespace":"com.dataiku.dss","doc":"","fields":[{"name":"avreau_col_1","type":["null",{"type":"map","values":["null","string"]}],"default":null}]}
Added: hive/trunk/data/files/map_null_val.avro
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/map_null_val.avro?rev=1635373&view=auto
==============================================================================
Binary file - no diff available.
Propchange: hive/trunk/data/files/map_null_val.avro
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: hive/trunk/ql/src/test/queries/clientpositive/avro_deserialize_map_null.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/avro_deserialize_map_null.q?rev=1635373&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/avro_deserialize_map_null.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/avro_deserialize_map_null.q Thu Oct 30 02:50:09 2014
@@ -0,0 +1,14 @@
+-- These test attempts to deserialize an Avro file that contains map null values, and the file schema
+-- vs record schema have the null values in different positions
+-- i.e.
+-- fileSchema = [{ "type" : "map", "values" : ["string","null"]}, "null"]
+-- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}]
+
+
+DROP TABLE IF EXISTS avro_table;
+
+CREATE TABLE avro_table (avreau_col_1 map<string,string>) STORED AS AVRO;
+LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table;
+SELECT * FROM avro_table;
+
+DROP TABLE avro_table;
\ No newline at end of file
Added: hive/trunk/ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out?rev=1635373&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out Thu Oct 30 02:50:09 2014
@@ -0,0 +1,55 @@
+PREHOOK: query: -- These test attempts to deserialize an Avro file that contains map null values, and the file schema
+-- vs record schema have the null values in different positions
+-- i.e.
+-- fileSchema = [{ "type" : "map", "values" : ["string","null"]}, "null"]
+-- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}]
+
+
+DROP TABLE IF EXISTS avro_table
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- These test attempts to deserialize an Avro file that contains map null values, and the file schema
+-- vs record schema have the null values in different positions
+-- i.e.
+-- fileSchema = [{ "type" : "map", "values" : ["string","null"]}, "null"]
+-- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}]
+
+
+DROP TABLE IF EXISTS avro_table
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map<string,string>) STORED AS AVRO
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_table
+POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map<string,string>) STORED AS AVRO
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_table
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@avro_table
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@avro_table
+PREHOOK: query: SELECT * FROM avro_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_table
+#### A masked pattern was here ####
+{"key4":null,"key3":"val3"}
+{"key4":null,"key3":"val3"}
+{"key2":"val2","key1":null}
+{"key4":null,"key3":"val3"}
+{"key4":null,"key3":"val3"}
+PREHOOK: query: DROP TABLE avro_table
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@avro_table
+PREHOOK: Output: default@avro_table
+POSTHOOK: query: DROP TABLE avro_table
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@avro_table
+POSTHOOK: Output: default@avro_table
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java?rev=1635373&r1=1635372&r2=1635373&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java Thu Oct 30 02:50:09 2014
@@ -200,7 +200,7 @@ class AvroDeserializer {
// Avro requires NULLable types to be defined as unions of some type T
// and NULL. This is annoying and we're going to hide it from the user.
if(AvroSerdeUtils.isNullableType(recordSchema)) {
- return deserializeNullableUnion(datum, fileSchema, recordSchema, columnType);
+ return deserializeNullableUnion(datum, fileSchema, recordSchema);
}
switch(columnType.getCategory()) {
@@ -296,8 +296,8 @@ class AvroDeserializer {
* Extract either a null or the correct type from a Nullable type. This is
* horrible in that we rebuild the TypeInfo every time.
*/
- private Object deserializeNullableUnion(Object datum, Schema fileSchema, Schema recordSchema,
- TypeInfo columnType) throws AvroSerdeException {
+ private Object deserializeNullableUnion(Object datum, Schema fileSchema, Schema recordSchema)
+ throws AvroSerdeException {
int tag = GenericData.get().resolveUnion(recordSchema, datum); // Determine index of value
Schema schema = recordSchema.getTypes().get(tag);
if (schema.getType().equals(Schema.Type.NULL)) {
@@ -306,8 +306,14 @@ class AvroDeserializer {
Schema currentFileSchema = null;
if (fileSchema != null) {
- currentFileSchema =
- fileSchema.getType() == Type.UNION ? fileSchema.getTypes().get(tag) : fileSchema;
+ if (fileSchema.getType() == Type.UNION) {
+ // The fileSchema may have the null value in a different position, so
+ // we need to get the correct tag
+ tag = GenericData.get().resolveUnion(fileSchema, datum);
+ currentFileSchema = fileSchema.getTypes().get(tag);
+ } else {
+ currentFileSchema = fileSchema;
+ }
}
return worker(datum, currentFileSchema, schema, SchemaToTypeInfo.generateTypeInfo(schema));