You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sp...@apache.org on 2016/07/29 15:26:32 UTC

hive git commit: HIVE-14294: HiveSchemaConverter for Parquet doesn't translate TINYINT and SMALLINT into proper Parquet types (Gabor Szadovszky, reviewed by Aihua Xu)

Repository: hive
Updated Branches:
  refs/heads/master ce154012f -> c6aa5552c


HIVE-14294: HiveSchemaConverter for Parquet doesn't translate TINYINT and SMALLINT into proper Parquet types (Gabor Szadovszky, reviewed by Aihua Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c6aa5552
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c6aa5552
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c6aa5552

Branch: refs/heads/master
Commit: c6aa5552c1bf5c0e0ecf06e338f5724fb6b93a58
Parents: ce15401
Author: Gabor Szadovszky <ga...@cloudera.com>
Authored: Fri Jul 29 10:25:41 2016 -0500
Committer: Sergio Pena <se...@cloudera.com>
Committed: Fri Jul 29 10:25:41 2016 -0500

----------------------------------------------------------------------
 .../io/parquet/convert/HiveSchemaConverter.java | 10 ++++--
 .../ql/io/parquet/TestHiveSchemaConverter.java  | 36 +++++++++++++++-----
 2 files changed, 34 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c6aa5552/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
index 40f6256..255f30c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
@@ -62,10 +62,14 @@ public class HiveSchemaConverter {
       if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
         return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
           .named(name);
-      } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
-          typeInfo.equals(TypeInfoFactory.shortTypeInfo) ||
-          typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
+      } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) {
         return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name);
+      } else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
+        return Types.primitive(PrimitiveTypeName.INT32, repetition)
+            .as(OriginalType.INT_16).named(name);
+      } else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
+        return Types.primitive(PrimitiveTypeName.INT32, repetition)
+            .as(OriginalType.INT_8).named(name);
       } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
         return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name);
       } else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {

http://git-wip-us.apache.org/repos/asf/hive/blob/c6aa5552/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
index 589b5b5..256031e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
@@ -20,17 +20,14 @@ import java.util.Arrays;
 import java.util.List;
 
 import org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter;
-import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.junit.Test;
-
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.MessageTypeParser;
 import org.apache.parquet.schema.OriginalType;
-import org.apache.parquet.schema.Types;
-import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Type;
 import org.apache.parquet.schema.Type.Repetition;
+import org.junit.Test;
 
 public class TestHiveSchemaConverter {
 
@@ -63,17 +60,38 @@ public class TestHiveSchemaConverter {
     final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
     final MessageType expectedMT = MessageTypeParser.parseMessageType(expectedSchema);
     assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + expectedSchema, expectedMT, messageTypeFound);
+
+    // Required to check the original types manually as PrimitiveType.equals does not care about it
+    List<Type> expectedFields = expectedMT.getFields();
+    List<Type> actualFields = messageTypeFound.getFields();
+    for (int i = 0, n = expectedFields.size(); i < n; ++i) {
+      OriginalType exp = expectedFields.get(i).getOriginalType();
+      OriginalType act = actualFields.get(i).getOriginalType();
+      assertEquals("Original types of the field do not match", exp, act);
+    }
   }
 
   @Test
   public void testSimpleType() throws Exception {
     testConversion(
-            "a,b,c",
-            "int,double,boolean",
+            "a,b,c,d",
+            "int,bigint,double,boolean",
             "message hive_schema {\n"
             + "  optional int32 a;\n"
-            + "  optional double b;\n"
-            + "  optional boolean c;\n"
+            + "  optional int64 b;\n"
+            + "  optional double c;\n"
+            + "  optional boolean d;\n"
+            + "}\n");
+  }
+
+  @Test
+  public void testSpecialIntType() throws Exception {
+    testConversion(
+            "a,b",
+            "tinyint,smallint",
+            "message hive_schema {\n"
+            + "  optional int32 a (INT_8);\n"
+            + "  optional int32 b (INT_16);\n"
             + "}\n");
   }