You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/04/02 23:55:45 UTC

svn commit: r1584201 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java

Author: xuefu
Date: Wed Apr  2 21:55:44 2014
New Revision: 1584201

URL: http://svn.apache.org/r1584201
Log:
HIVE-6783: Incompatible schema for maps between parquet-hive and parquet-pig (Tongjie via Xuefu)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java?rev=1584201&r1=1584200&r2=1584201&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java Wed Apr  2 21:55:44 2014
@@ -23,6 +23,7 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
+import parquet.schema.ConversionPatterns;
 import parquet.schema.GroupType;
 import parquet.schema.MessageType;
 import parquet.schema.OriginalType;
@@ -118,8 +119,7 @@ public class HiveSchemaConverter {
         typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
     final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
         typeInfo.getMapValueTypeInfo());
-    return listWrapper(name, OriginalType.MAP_KEY_VALUE,
-        new GroupType(Repetition.REPEATED, ParquetHiveSerDe.MAP.toString(), keyType, valueType));
+    return ConversionPatterns.mapType(Repetition.OPTIONAL, name, keyType, valueType);
   }
 
   private static GroupType listWrapper(final String name, final OriginalType originalType,

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java?rev=1584201&r1=1584200&r2=1584201&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java Wed Apr  2 21:55:44 2014
@@ -26,6 +26,8 @@ import org.junit.Test;
 
 import parquet.schema.MessageType;
 import parquet.schema.MessageTypeParser;
+import parquet.schema.OriginalType;
+import parquet.schema.Type.Repetition;
 
 public class TestHiveSchemaConverter {
 
@@ -111,4 +113,26 @@ public class TestHiveSchemaConverter {
             + "  }\n"
             + "}\n");
   }
+
+  @Test
+  public void testMapOriginalType() throws Exception {
+    final String hiveColumnTypes = "map<string,string>";
+    final String hiveColumnNames = "mapCol";
+    final List<String> columnNames = createHiveColumnsFrom(hiveColumnNames);
+    final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
+    final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
+    // this messageType only has one optional field, whose name is mapCol, original Type is MAP
+    assertEquals(1, messageTypeFound.getFieldCount());
+    parquet.schema.Type topLevel = messageTypeFound.getFields().get(0);
+    assertEquals("mapCol",topLevel.getName());
+    assertEquals(OriginalType.MAP, topLevel.getOriginalType());
+    assertEquals(Repetition.OPTIONAL, topLevel.getRepetition());
+
+    assertEquals(1, topLevel.asGroupType().getFieldCount());
+    parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0);
+    //there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE;
+    assertEquals("map", secondLevel.getName());
+    assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType());
+    assertEquals(Repetition.REPEATED, secondLevel.getRepetition());
+  }
 }