You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/05/30 03:34:17 UTC

svn commit: r1598450 - in /hive/trunk: common/src/test/org/apache/hadoop/hive/common/type/ data/files/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ ql/src/test/org/apache/hadoop/hive/...

Author: xuefu
Date: Fri May 30 01:34:16 2014
New Revision: 1598450

URL: http://svn.apache.org/r1598450
Log:
HIVE-7123: Follow-up of HIVE-6367 (reviewed by Brock)

Added:
    hive/trunk/data/files/dec_comp.txt
Modified:
    hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
    hive/trunk/ql/src/test/results/clientpositive/parquet_decimal1.q.out

Modified: hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java?rev=1598450&r1=1598449&r2=1598450&view=diff
==============================================================================
--- hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java (original)
+++ hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java Fri May 30 01:34:16 2014
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hive.common.type;
 
 import java.math.BigDecimal;
+import java.math.BigInteger;
 
 import org.junit.Assert;
 import org.junit.Test;
@@ -118,4 +119,17 @@ public class TestHiveDecimal {
     Assert.assertNull(dec);
   }
 
+  @Test
+  public void testBinaryConversion() {
+    HiveDecimal dec = HiveDecimal.create("234.79");
+    int scale = 2;
+    byte[] d = dec.setScale(2).unscaledValue().toByteArray();
+    Assert.assertEquals(dec, HiveDecimal.create(new BigInteger(d), scale));
+    int prec = 5;
+    int len =  (int) (Math.ceil((Math.log(Math.pow(10, prec)) - 1)/Math.log(2) + 1) / 8);
+    byte[] res = new byte[len];
+    System.arraycopy(d, 0, res, len-d.length, d.length); // Padding leading zeros.
+    Assert.assertEquals(dec, HiveDecimal.create(new BigInteger(res), scale));
+  }
+
 }

Added: hive/trunk/data/files/dec_comp.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/dec_comp.txt?rev=1598450&view=auto
==============================================================================
--- hive/trunk/data/files/dec_comp.txt (added)
+++ hive/trunk/data/files/dec_comp.txt Fri May 30 01:34:16 2014
@@ -0,0 +1,2 @@
+3.14,6.28,7.30|k1:92.77,k2:29.39|5,9.03
+12.4,1.33,0.34|k2:2.79,k4:29.09|11,0.0314

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java?rev=1598450&r1=1598449&r2=1598450&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java Fri May 30 01:34:16 2014
@@ -83,8 +83,11 @@ public class HiveSchemaConverter {
         throw new UnsupportedOperationException("Void type not implemented");
       } else if (typeInfo instanceof DecimalTypeInfo) {
         DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
-        return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.DECIMAL).scale(decimalTypeInfo.scale()).
-            precision(decimalTypeInfo.precision()).named(name);
+        int prec = decimalTypeInfo.precision();
+        int scale = decimalTypeInfo.scale();
+        int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
+        return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).
+        		scale(scale).precision(prec).named(name);
       } else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
         throw new UnsupportedOperationException("Unknown type not implemented");
       } else {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java?rev=1598450&r1=1598449&r2=1598450&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java Fri May 30 01:34:16 2014
@@ -37,7 +37,6 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
@@ -67,12 +66,21 @@ import parquet.io.api.Binary;
  *
  */
 public class ParquetHiveSerDe extends AbstractSerDe {
-
   public static final Text MAP_KEY = new Text("key");
   public static final Text MAP_VALUE = new Text("value");
   public static final Text MAP = new Text("map");
   public static final Text ARRAY = new Text("bag");
 
+  // Map precision to the number bytes needed for binary conversion.
+  public static final int PRECISION_TO_BYTE_COUNT[] = new int[38];
+  static {
+    for (int prec = 1; prec <= 38; prec++) {
+      // Estimated number of bytes needed.
+      PRECISION_TO_BYTE_COUNT[prec - 1] = (int)
+          Math.ceil((Math.log(Math.pow(10, prec) - 1) / Math.log(2) + 1) / 8);
+    }
+  }
+
   private SerDeStats stats;
   private ObjectInspector objInspector;
 
@@ -246,7 +254,18 @@ public class ParquetHiveSerDe extends Ab
     case DECIMAL:
       HiveDecimal hd = (HiveDecimal)inspector.getPrimitiveJavaObject(obj);
       DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) inspector.getTypeInfo();
-      return new BinaryWritable(Binary.fromByteArray(hd.setScale(decTypeInfo.scale()).unscaledValue().toByteArray()));
+      int prec = decTypeInfo.precision();
+      int scale = decTypeInfo.scale();
+      byte[] src = hd.setScale(scale).unscaledValue().toByteArray();
+      // Estimated number of bytes needed.
+      int bytes =  PRECISION_TO_BYTE_COUNT[prec - 1];
+      if (bytes == src.length) {
+        // No padding needed.
+        return new BinaryWritable(Binary.fromByteArray(src));
+      }
+      byte[] tgt = new byte[bytes];
+      System.arraycopy(src, 0, tgt, bytes - src.length, src.length); // Padding leading zeroes.
+      return new BinaryWritable(Binary.fromByteArray(tgt));
     default:
       throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory());
     }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java?rev=1598450&r1=1598449&r2=1598450&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java Fri May 30 01:34:16 2014
@@ -20,6 +20,7 @@ import java.util.Arrays;
 import java.util.List;
 
 import org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter;
+import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.junit.Test;
@@ -27,6 +28,8 @@ import org.junit.Test;
 import parquet.schema.MessageType;
 import parquet.schema.MessageTypeParser;
 import parquet.schema.OriginalType;
+import parquet.schema.Types;
+import parquet.schema.PrimitiveType.PrimitiveTypeName;
 import parquet.schema.Type.Repetition;
 
 public class TestHiveSchemaConverter {
@@ -80,7 +83,7 @@ public class TestHiveSchemaConverter {
             "a",
             "decimal(5,2)",
             "message hive_schema {\n"
-            + "  optional binary a (DECIMAL(5,2));\n"
+            + "  optional fixed_len_byte_array(3) a (DECIMAL(5,2));\n"
             + "}\n");
   }
 
@@ -104,7 +107,7 @@ public class TestHiveSchemaConverter {
             "message hive_schema {\n"
             + "  optional group arrayCol (LIST) {\n"
             + "    repeated group bag {\n"
-            + "      optional binary array_element (DECIMAL(5,2));\n"
+            + "      optional fixed_len_byte_array(3) array_element (DECIMAL(5,2));\n"
             + "    }\n"
             + "  }\n"
             + "}\n");
@@ -119,7 +122,7 @@ public class TestHiveSchemaConverter {
             + "    optional int32 a;\n"
             + "    optional double b;\n"
             + "    optional boolean c;\n"
-            + "    optional binary d (DECIMAL(5,2));\n"
+            + "    optional fixed_len_byte_array(3) d (DECIMAL(5,2));\n"
             + "  }\n"
             + "}\n");
   }
@@ -146,7 +149,7 @@ public class TestHiveSchemaConverter {
             + "  optional group mapCol (MAP) {\n"
             + "    repeated group map (MAP_KEY_VALUE) {\n"
             + "      required binary key;\n"
-            + "      optional binary value (DECIMAL(5,2));\n"
+            + "      optional fixed_len_byte_array(3) value (DECIMAL(5,2));\n"
             + "    }\n"
             + "  }\n"
             + "}\n");

Modified: hive/trunk/ql/src/test/results/clientpositive/parquet_decimal1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_decimal1.q.out?rev=1598450&r1=1598449&r2=1598450&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_decimal1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_decimal1.q.out Fri May 30 01:34:16 2014
@@ -70,9 +70,6 @@ POSTHOOK: query: SELECT * FROM parq_dec_
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@parq_dec_comp
 #### A masked pattern was here ####
-POSTHOOK: Lineage: parq_dec_comp.arr SIMPLE [(dec_comp)dec_comp.FieldSchema(name:arr, type:array<decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.m SIMPLE [(dec_comp)dec_comp.FieldSchema(name:m, type:map<string,decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.s SIMPLE [(dec_comp)dec_comp.FieldSchema(name:s, type:struct<i:int,d:decimal(5,2)>, comment:null), ]
 [3.14,6.28,7.3]	{"k2":29.39,"k1":92.77}	{"i":5,"d":9.03}
 [12.4,1.33,0.34]	{"k4":29.09,"k2":2.79}	{"i":11,"d":0.03}
 PREHOOK: query: DROP TABLE dec_comp
@@ -83,9 +80,6 @@ POSTHOOK: query: DROP TABLE dec_comp
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@dec_comp
 POSTHOOK: Output: default@dec_comp
-POSTHOOK: Lineage: parq_dec_comp.arr SIMPLE [(dec_comp)dec_comp.FieldSchema(name:arr, type:array<decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.m SIMPLE [(dec_comp)dec_comp.FieldSchema(name:m, type:map<string,decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.s SIMPLE [(dec_comp)dec_comp.FieldSchema(name:s, type:struct<i:int,d:decimal(5,2)>, comment:null), ]
 PREHOOK: query: DROP TABLE parq_dec_comp
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@parq_dec_comp
@@ -94,6 +88,3 @@ POSTHOOK: query: DROP TABLE parq_dec_com
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@parq_dec_comp
 POSTHOOK: Output: default@parq_dec_comp
-POSTHOOK: Lineage: parq_dec_comp.arr SIMPLE [(dec_comp)dec_comp.FieldSchema(name:arr, type:array<decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.m SIMPLE [(dec_comp)dec_comp.FieldSchema(name:m, type:map<string,decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.s SIMPLE [(dec_comp)dec_comp.FieldSchema(name:s, type:struct<i:int,d:decimal(5,2)>, comment:null), ]