You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/10/30 03:50:10 UTC

svn commit: r1635373 - in /hive/trunk: data/files/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ serde/src/java/org/apache/hadoop/hive/serde2/avro/

Author: brock
Date: Thu Oct 30 02:50:09 2014
New Revision: 1635373

URL: http://svn.apache.org/r1635373
Log:
HIVE-8577 - Cannot deserialize Avro schema with a map<string,string> with null values (Sergio Pena via Brock)

Added:
    hive/trunk/data/files/map_null_schema.avro
    hive/trunk/data/files/map_null_val.avro   (with props)
    hive/trunk/ql/src/test/queries/clientpositive/avro_deserialize_map_null.q
    hive/trunk/ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out
Modified:
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java

Added: hive/trunk/data/files/map_null_schema.avro
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/map_null_schema.avro?rev=1635373&view=auto
==============================================================================
--- hive/trunk/data/files/map_null_schema.avro (added)
+++ hive/trunk/data/files/map_null_schema.avro Thu Oct 30 02:50:09 2014
@@ -0,0 +1 @@
+{"type":"record","name":"dku_record_0","namespace":"com.dataiku.dss","doc":"","fields":[{"name":"avreau_col_1","type":["null",{"type":"map","values":["null","string"]}],"default":null}]}

Added: hive/trunk/data/files/map_null_val.avro
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/map_null_val.avro?rev=1635373&view=auto
==============================================================================
Binary file - no diff available.

Propchange: hive/trunk/data/files/map_null_val.avro
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: hive/trunk/ql/src/test/queries/clientpositive/avro_deserialize_map_null.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/avro_deserialize_map_null.q?rev=1635373&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/avro_deserialize_map_null.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/avro_deserialize_map_null.q Thu Oct 30 02:50:09 2014
@@ -0,0 +1,14 @@
+-- These test attempts to deserialize an Avro file that contains map null values, and the file schema
+-- vs record schema have the null values in different positions
+-- i.e.
+-- fileSchema   = [{ "type" : "map", "values" : ["string","null"]}, "null"]
+-- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}]
+
+
+DROP TABLE IF EXISTS avro_table;
+
+CREATE TABLE avro_table (avreau_col_1 map<string,string>) STORED AS AVRO;
+LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table;
+SELECT * FROM avro_table;
+
+DROP TABLE avro_table;
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out?rev=1635373&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out Thu Oct 30 02:50:09 2014
@@ -0,0 +1,55 @@
+PREHOOK: query: -- These test attempts to deserialize an Avro file that contains map null values, and the file schema
+-- vs record schema have the null values in different positions
+-- i.e.
+-- fileSchema   = [{ "type" : "map", "values" : ["string","null"]}, "null"]
+-- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}]
+
+
+DROP TABLE IF EXISTS avro_table
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- These test attempts to deserialize an Avro file that contains map null values, and the file schema
+-- vs record schema have the null values in different positions
+-- i.e.
+-- fileSchema   = [{ "type" : "map", "values" : ["string","null"]}, "null"]
+-- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}]
+
+
+DROP TABLE IF EXISTS avro_table
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map<string,string>) STORED AS AVRO
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_table
+POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map<string,string>) STORED AS AVRO
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_table
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@avro_table
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@avro_table
+PREHOOK: query: SELECT * FROM avro_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_table
+#### A masked pattern was here ####
+{"key4":null,"key3":"val3"}
+{"key4":null,"key3":"val3"}
+{"key2":"val2","key1":null}
+{"key4":null,"key3":"val3"}
+{"key4":null,"key3":"val3"}
+PREHOOK: query: DROP TABLE avro_table
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@avro_table
+PREHOOK: Output: default@avro_table
+POSTHOOK: query: DROP TABLE avro_table
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@avro_table
+POSTHOOK: Output: default@avro_table

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java?rev=1635373&r1=1635372&r2=1635373&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java Thu Oct 30 02:50:09 2014
@@ -200,7 +200,7 @@ class AvroDeserializer {
     // Avro requires NULLable types to be defined as unions of some type T
     // and NULL.  This is annoying and we're going to hide it from the user.
     if(AvroSerdeUtils.isNullableType(recordSchema)) {
-      return deserializeNullableUnion(datum, fileSchema, recordSchema, columnType);
+      return deserializeNullableUnion(datum, fileSchema, recordSchema);
     }
 
     switch(columnType.getCategory()) {
@@ -296,8 +296,8 @@ class AvroDeserializer {
    * Extract either a null or the correct type from a Nullable type.  This is
    * horrible in that we rebuild the TypeInfo every time.
    */
-  private Object deserializeNullableUnion(Object datum, Schema fileSchema, Schema recordSchema,
-                                          TypeInfo columnType) throws AvroSerdeException {
+  private Object deserializeNullableUnion(Object datum, Schema fileSchema, Schema recordSchema)
+                                            throws AvroSerdeException {
     int tag = GenericData.get().resolveUnion(recordSchema, datum); // Determine index of value
     Schema schema = recordSchema.getTypes().get(tag);
     if (schema.getType().equals(Schema.Type.NULL)) {
@@ -306,8 +306,14 @@ class AvroDeserializer {
 
     Schema currentFileSchema = null;
     if (fileSchema != null) {
-       currentFileSchema =
-           fileSchema.getType() == Type.UNION ? fileSchema.getTypes().get(tag) : fileSchema;
+      if (fileSchema.getType() == Type.UNION) {
+        // The fileSchema may have the null value in a different position, so
+        // we need to get the correct tag
+        tag = GenericData.get().resolveUnion(fileSchema, datum);
+        currentFileSchema = fileSchema.getTypes().get(tag);
+      } else {
+        currentFileSchema = fileSchema;
+      }
     }
     return worker(datum, currentFileSchema, schema, SchemaToTypeInfo.generateTypeInfo(schema));