You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/10/07 00:23:56 UTC

svn commit: r1629779 - in /hive/trunk/serde/src: java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java

Author: brock
Date: Mon Oct  6 22:23:55 2014
New Revision: 1629779

URL: http://svn.apache.org/r1629779
Log:
HIVE-5865 - AvroDeserializer incorrectly assumes keys to Maps will always be of type 'org.apache.avro.util.Utf8' (Ben Roling via Brock)

Modified:
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
    hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java?rev=1629779&r1=1629778&r2=1629779&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java Mon Oct  6 22:23:55 2014
@@ -40,7 +40,6 @@ import org.apache.avro.io.BinaryDecoder;
 import org.apache.avro.io.BinaryEncoder;
 import org.apache.avro.io.DecoderFactory;
 import org.apache.avro.io.EncoderFactory;
-import org.apache.avro.util.Utf8;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.common.type.HiveChar;
@@ -370,10 +369,10 @@ class AvroDeserializer {
     // Avro only allows maps with Strings for keys, so we only have to worry
     // about deserializing the values
     Map<String, Object> map = new HashMap<String, Object>();
-    Map<Utf8, Object> mapDatum = (Map)datum;
+    Map<CharSequence, Object> mapDatum = (Map)datum;
     Schema valueSchema = mapSchema.getValueType();
     TypeInfo valueTypeInfo = columnType.getMapValueTypeInfo();
-    for (Utf8 key : mapDatum.keySet()) {
+    for (CharSequence key : mapDatum.keySet()) {
       Object value = mapDatum.get(key);
       map.put(key.toString(), worker(value, fileSchema == null ? null : fileSchema.getValueType(),
           valueSchema, valueTypeInfo));

Modified: hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java?rev=1629779&r1=1629778&r2=1629779&view=diff
==============================================================================
--- hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java (original)
+++ hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java Mon Oct  6 22:23:55 2014
@@ -475,6 +475,67 @@ public class TestAvroDeserializer {
     assertTrue(theMap2.containsKey("mu"));
     assertEquals(null, theMap2.get("mu"));
   }
+  
+  @Test
+  public void canDeserializeMapsWithJavaLangStringKeys() throws IOException, SerDeException {
+    // Ensures maps can be deserialized when avro.java.string=String.
+    // See http://stackoverflow.com/a/19868919/312944 for why that might be used.
+    String schemaString = "{\n" +
+        "  \"namespace\": \"testing\",\n" +
+        "  \"name\": \"oneMap\",\n" +
+        "  \"type\": \"record\",\n" +
+        "  \"fields\": [\n" +
+        "    {\n" +
+        "      \"name\":\"aMap\",\n" +
+        "      \"type\":{\"type\":\"map\",\n" +
+        "      \"avro.java.string\":\"String\",\n" +
+        "      \"values\":\"long\"}\n" +
+        "\t}\n" +
+        "  ]\n" +
+        "}";
+    Schema s = AvroSerdeUtils.getSchemaFor(schemaString);
+    GenericData.Record record = new GenericData.Record(s);
+
+    Map<String, Long> m = new Hashtable<String, Long>();
+    m.put("one", 1l);
+    m.put("two", 2l);
+    m.put("three", 3l);
+
+    record.put("aMap", m);
+    assertTrue(GENERIC_DATA.validate(s, record));
+    System.out.println("record = " + record);
+
+    AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
+
+    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
+
+    AvroDeserializer de = new AvroDeserializer();
+
+    ArrayList<Object> row = (ArrayList<Object>)de.deserialize(aoig.getColumnNames(),
+            aoig.getColumnTypes(), garw, s);
+    assertEquals(1, row.size());
+    Object theMapObject = row.get(0);
+    assertTrue(theMapObject instanceof Map);
+    Map theMap = (Map)theMapObject;
+
+    // Verify the raw object that's been created
+    assertEquals(1l, theMap.get("one"));
+    assertEquals(2l, theMap.get("two"));
+    assertEquals(3l, theMap.get("three"));
+
+    // Verify that the provided object inspector can pull out these same values
+    StandardStructObjectInspector oi =
+            (StandardStructObjectInspector)aoig.getObjectInspector();
+
+    List<Object> z = oi.getStructFieldsDataAsList(row);
+    assertEquals(1, z.size());
+    StructField fieldRef = oi.getStructFieldRef("amap");
+
+    Map theMap2 = (Map)oi.getStructFieldData(row, fieldRef);
+    assertEquals(1l, theMap2.get("one"));
+    assertEquals(2l, theMap2.get("two"));
+    assertEquals(3l, theMap2.get("three"));
+  }
 
   private void verifyNullableType(GenericData.Record record, Schema s, String fieldName,
                                   String expected) throws SerDeException, IOException {