You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/05/30 03:34:17 UTC
svn commit: r1598450 - in /hive/trunk:
common/src/test/org/apache/hadoop/hive/common/type/ data/files/
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/
ql/src/test/org/apache/hadoop/hive/...
Author: xuefu
Date: Fri May 30 01:34:16 2014
New Revision: 1598450
URL: http://svn.apache.org/r1598450
Log:
HIVE-7123: Follow-up of HIVE-6367 (reviewed by Brock)
Added:
hive/trunk/data/files/dec_comp.txt
Modified:
hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
hive/trunk/ql/src/test/results/clientpositive/parquet_decimal1.q.out
Modified: hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java?rev=1598450&r1=1598449&r2=1598450&view=diff
==============================================================================
--- hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java (original)
+++ hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java Fri May 30 01:34:16 2014
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.common.type;
import java.math.BigDecimal;
+import java.math.BigInteger;
import org.junit.Assert;
import org.junit.Test;
@@ -118,4 +119,17 @@ public class TestHiveDecimal {
Assert.assertNull(dec);
}
+ @Test
+ public void testBinaryConversion() {
+ HiveDecimal dec = HiveDecimal.create("234.79");
+ int scale = 2;
+ byte[] d = dec.setScale(2).unscaledValue().toByteArray();
+ Assert.assertEquals(dec, HiveDecimal.create(new BigInteger(d), scale));
+ int prec = 5;
+ int len = (int) (Math.ceil((Math.log(Math.pow(10, prec)) - 1)/Math.log(2) + 1) / 8);
+ byte[] res = new byte[len];
+ System.arraycopy(d, 0, res, len-d.length, d.length); // Padding leading zeros.
+ Assert.assertEquals(dec, HiveDecimal.create(new BigInteger(res), scale));
+ }
+
}
Added: hive/trunk/data/files/dec_comp.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/dec_comp.txt?rev=1598450&view=auto
==============================================================================
--- hive/trunk/data/files/dec_comp.txt (added)
+++ hive/trunk/data/files/dec_comp.txt Fri May 30 01:34:16 2014
@@ -0,0 +1,2 @@
+3.14,6.28,7.30|k1:92.77,k2:29.39|5,9.03
+12.4,1.33,0.34|k2:2.79,k4:29.09|11,0.0314
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java?rev=1598450&r1=1598449&r2=1598450&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java Fri May 30 01:34:16 2014
@@ -83,8 +83,11 @@ public class HiveSchemaConverter {
throw new UnsupportedOperationException("Void type not implemented");
} else if (typeInfo instanceof DecimalTypeInfo) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
- return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.DECIMAL).scale(decimalTypeInfo.scale()).
- precision(decimalTypeInfo.precision()).named(name);
+ int prec = decimalTypeInfo.precision();
+ int scale = decimalTypeInfo.scale();
+ int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
+ return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).
+ scale(scale).precision(prec).named(name);
} else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
throw new UnsupportedOperationException("Unknown type not implemented");
} else {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java?rev=1598450&r1=1598449&r2=1598450&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java Fri May 30 01:34:16 2014
@@ -37,7 +37,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
@@ -67,12 +66,21 @@ import parquet.io.api.Binary;
*
*/
public class ParquetHiveSerDe extends AbstractSerDe {
-
public static final Text MAP_KEY = new Text("key");
public static final Text MAP_VALUE = new Text("value");
public static final Text MAP = new Text("map");
public static final Text ARRAY = new Text("bag");
+ // Map precision to the number bytes needed for binary conversion.
+ public static final int PRECISION_TO_BYTE_COUNT[] = new int[38];
+ static {
+ for (int prec = 1; prec <= 38; prec++) {
+ // Estimated number of bytes needed.
+ PRECISION_TO_BYTE_COUNT[prec - 1] = (int)
+ Math.ceil((Math.log(Math.pow(10, prec) - 1) / Math.log(2) + 1) / 8);
+ }
+ }
+
private SerDeStats stats;
private ObjectInspector objInspector;
@@ -246,7 +254,18 @@ public class ParquetHiveSerDe extends Ab
case DECIMAL:
HiveDecimal hd = (HiveDecimal)inspector.getPrimitiveJavaObject(obj);
DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) inspector.getTypeInfo();
- return new BinaryWritable(Binary.fromByteArray(hd.setScale(decTypeInfo.scale()).unscaledValue().toByteArray()));
+ int prec = decTypeInfo.precision();
+ int scale = decTypeInfo.scale();
+ byte[] src = hd.setScale(scale).unscaledValue().toByteArray();
+ // Estimated number of bytes needed.
+ int bytes = PRECISION_TO_BYTE_COUNT[prec - 1];
+ if (bytes == src.length) {
+ // No padding needed.
+ return new BinaryWritable(Binary.fromByteArray(src));
+ }
+ byte[] tgt = new byte[bytes];
+ System.arraycopy(src, 0, tgt, bytes - src.length, src.length); // Padding leading zeroes.
+ return new BinaryWritable(Binary.fromByteArray(tgt));
default:
throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory());
}
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java?rev=1598450&r1=1598449&r2=1598450&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java Fri May 30 01:34:16 2014
@@ -20,6 +20,7 @@ import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter;
+import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.junit.Test;
@@ -27,6 +28,8 @@ import org.junit.Test;
import parquet.schema.MessageType;
import parquet.schema.MessageTypeParser;
import parquet.schema.OriginalType;
+import parquet.schema.Types;
+import parquet.schema.PrimitiveType.PrimitiveTypeName;
import parquet.schema.Type.Repetition;
public class TestHiveSchemaConverter {
@@ -80,7 +83,7 @@ public class TestHiveSchemaConverter {
"a",
"decimal(5,2)",
"message hive_schema {\n"
- + " optional binary a (DECIMAL(5,2));\n"
+ + " optional fixed_len_byte_array(3) a (DECIMAL(5,2));\n"
+ "}\n");
}
@@ -104,7 +107,7 @@ public class TestHiveSchemaConverter {
"message hive_schema {\n"
+ " optional group arrayCol (LIST) {\n"
+ " repeated group bag {\n"
- + " optional binary array_element (DECIMAL(5,2));\n"
+ + " optional fixed_len_byte_array(3) array_element (DECIMAL(5,2));\n"
+ " }\n"
+ " }\n"
+ "}\n");
@@ -119,7 +122,7 @@ public class TestHiveSchemaConverter {
+ " optional int32 a;\n"
+ " optional double b;\n"
+ " optional boolean c;\n"
- + " optional binary d (DECIMAL(5,2));\n"
+ + " optional fixed_len_byte_array(3) d (DECIMAL(5,2));\n"
+ " }\n"
+ "}\n");
}
@@ -146,7 +149,7 @@ public class TestHiveSchemaConverter {
+ " optional group mapCol (MAP) {\n"
+ " repeated group map (MAP_KEY_VALUE) {\n"
+ " required binary key;\n"
- + " optional binary value (DECIMAL(5,2));\n"
+ + " optional fixed_len_byte_array(3) value (DECIMAL(5,2));\n"
+ " }\n"
+ " }\n"
+ "}\n");
Modified: hive/trunk/ql/src/test/results/clientpositive/parquet_decimal1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_decimal1.q.out?rev=1598450&r1=1598449&r2=1598450&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_decimal1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_decimal1.q.out Fri May 30 01:34:16 2014
@@ -70,9 +70,6 @@ POSTHOOK: query: SELECT * FROM parq_dec_
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parq_dec_comp
#### A masked pattern was here ####
-POSTHOOK: Lineage: parq_dec_comp.arr SIMPLE [(dec_comp)dec_comp.FieldSchema(name:arr, type:array<decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.m SIMPLE [(dec_comp)dec_comp.FieldSchema(name:m, type:map<string,decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.s SIMPLE [(dec_comp)dec_comp.FieldSchema(name:s, type:struct<i:int,d:decimal(5,2)>, comment:null), ]
[3.14,6.28,7.3] {"k2":29.39,"k1":92.77} {"i":5,"d":9.03}
[12.4,1.33,0.34] {"k4":29.09,"k2":2.79} {"i":11,"d":0.03}
PREHOOK: query: DROP TABLE dec_comp
@@ -83,9 +80,6 @@ POSTHOOK: query: DROP TABLE dec_comp
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@dec_comp
POSTHOOK: Output: default@dec_comp
-POSTHOOK: Lineage: parq_dec_comp.arr SIMPLE [(dec_comp)dec_comp.FieldSchema(name:arr, type:array<decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.m SIMPLE [(dec_comp)dec_comp.FieldSchema(name:m, type:map<string,decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.s SIMPLE [(dec_comp)dec_comp.FieldSchema(name:s, type:struct<i:int,d:decimal(5,2)>, comment:null), ]
PREHOOK: query: DROP TABLE parq_dec_comp
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@parq_dec_comp
@@ -94,6 +88,3 @@ POSTHOOK: query: DROP TABLE parq_dec_com
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@parq_dec_comp
POSTHOOK: Output: default@parq_dec_comp
-POSTHOOK: Lineage: parq_dec_comp.arr SIMPLE [(dec_comp)dec_comp.FieldSchema(name:arr, type:array<decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.m SIMPLE [(dec_comp)dec_comp.FieldSchema(name:m, type:map<string,decimal(5,2)>, comment:null), ]
-POSTHOOK: Lineage: parq_dec_comp.s SIMPLE [(dec_comp)dec_comp.FieldSchema(name:s, type:struct<i:int,d:decimal(5,2)>, comment:null), ]