You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ju...@apache.org on 2017/06/07 22:22:33 UTC
parquet-mr git commit: Parquet-884: Add support for Decimal datatype
to Parquet-Pig record reader
Repository: parquet-mr
Updated Branches:
refs/heads/master 9491d7a61 -> 9d58b6a83
Parquet-884: Add support for Decimal datatype to Parquet-Pig record reader
Adds conversion support to Pig for Decimal datatype. Based on the scala code in the spark project that provides a similar function for their sql library.
Author: EllenKletscher <el...@capitalone.com>
Closes #404 from EllenKletscher/master and squashes the following commits:
7714738 [EllenKletscher] add comment for precision check
50c75c8 [EllenKletscher] remove check for primitiveType null
08d4dbb [EllenKletscher] PARQUET-884: Add missing AL header
57c4d72 [EllenKletscher] PARQUET-884: Add missing AL header
ea61267 [EllenKletscher] PARQUET-884: add support for decimal type to pig reader
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/9d58b6a8
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/9d58b6a8
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/9d58b6a8
Branch: refs/heads/master
Commit: 9d58b6a83aa79dcad01c3bcc2ec0a7db74ba83b1
Parents: 9491d7a
Author: EllenKletscher <el...@capitalone.com>
Authored: Wed Jun 7 15:22:28 2017 -0700
Committer: Julien Le Dem <ju...@apache.org>
Committed: Wed Jun 7 15:22:28 2017 -0700
----------------------------------------------------------------------
.../apache/parquet/pig/PigSchemaConverter.java | 8 +-
.../parquet/pig/convert/DecimalUtils.java | 65 ++++++++++++++++
.../parquet/pig/convert/TupleConverter.java | 27 +++++++
.../apache/parquet/pig/TestDecimalUtils.java | 79 ++++++++++++++++++++
4 files changed, 177 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9d58b6a8/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java
index c9eb0ba..e560e42 100644
--- a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java
+++ b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java
@@ -244,8 +244,12 @@ public class PigSchemaConverter {
@Override
public FieldSchema convertFIXED_LEN_BYTE_ARRAY(
- PrimitiveTypeName primitiveTypeName) throws FrontendException {
- return new FieldSchema(fieldName, null, DataType.BYTEARRAY);
+ PrimitiveTypeName primitiveTypeName) throws FrontendException {
+ if (originalType == OriginalType.DECIMAL) {
+ return new FieldSchema(fieldName, null, DataType.BIGDECIMAL);
+ } else {
+ return new FieldSchema(fieldName, null, DataType.BYTEARRAY);
+ }
}
@Override
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9d58b6a8/parquet-pig/src/main/java/org/apache/parquet/pig/convert/DecimalUtils.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/convert/DecimalUtils.java b/parquet-pig/src/main/java/org/apache/parquet/pig/convert/DecimalUtils.java
new file mode 100644
index 0000000..f850332
--- /dev/null
+++ b/parquet-pig/src/main/java/org/apache/parquet/pig/convert/DecimalUtils.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.pig.convert;
+
+import java.nio.ByteBuffer;
+import java.math.BigInteger;
+import java.math.BigDecimal;
+import static java.lang.Math.pow;
+
+import org.apache.parquet.io.api.Binary;
+
+/*
+ * Conversion between Parquet Decimal Type to Java BigDecimal in Pig
+ * Code Based on the Apache Spark ParquetRowConverter.scala
+ *
+ *
+ */
+
+public class DecimalUtils {
+
+ public static BigDecimal binaryToDecimal(Binary value, int precision, int scale) {
+ /*
+ * Precision <= 18 checks for the max number of digits for an unscaled long,
+ * else treat with big integer conversion
+ */
+ if (precision <= 18) {
+ ByteBuffer buffer = value.toByteBuffer();
+ byte[] bytes = buffer.array();
+ int start = buffer.arrayOffset() + buffer.position();
+ int end = buffer.arrayOffset() + buffer.limit();
+ long unscaled = 0L;
+ int i = start;
+ while ( i < end ) {
+ unscaled = ( unscaled << 8 | bytes[i] & 0xff );
+ i++;
+ }
+ int bits = 8*(end - start);
+ long unscaledNew = (unscaled << (64 - bits)) >> (64 - bits);
+ if (unscaledNew <= -pow(10,18) || unscaledNew >= pow(10,18)) {
+ return new BigDecimal(unscaledNew);
+ } else {
+ return BigDecimal.valueOf(unscaledNew / pow(10,scale));
+ }
+ } else {
+ return new BigDecimal(new BigInteger(value.getBytes()), scale);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9d58b6a8/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java b/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java
index 3887332..1c7ab6c 100644
--- a/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java
+++ b/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java
@@ -21,6 +21,7 @@ package org.apache.parquet.pig.convert;
import static java.lang.Math.max;
import java.util.ArrayList;
import java.util.List;
+import java.math.BigDecimal;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataByteArray;
@@ -39,9 +40,11 @@ import org.apache.parquet.io.api.Converter;
import org.apache.parquet.io.api.GroupConverter;
import org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.parquet.pig.TupleConversionException;
+import org.apache.parquet.pig.convert.DecimalUtils;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.DecimalMetadata;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
@@ -140,6 +143,8 @@ public class TupleConverter extends GroupConverter {
return new FieldDoubleConverter(parent);
case DataType.LONG:
return new FieldLongConverter(parent);
+ case DataType.BIGDECIMAL:
+ return new FieldBigDecimalConverter(type, parent);
default:
throw new TupleConversionException("unsupported pig type: " + pigField);
}
@@ -530,6 +535,28 @@ public class TupleConverter extends GroupConverter {
}
/**
+ * handle decimal type
+ *
+ */
+ static final class FieldBigDecimalConverter extends PrimitiveConverter {
+ private final ParentValueContainer parent;
+ private final Type primitiveType;
+ public FieldBigDecimalConverter(Type primitiveType, ParentValueContainer parent) {
+ this.parent = parent;
+ this.primitiveType = primitiveType;
+ }
+
+ @Override
+ final public void addBinary(Binary value) {
+ int precision = primitiveType.asPrimitiveType().getDecimalMetadata().getPrecision();
+ int scale = primitiveType.asPrimitiveType().getDecimalMetadata().getScale();
+ BigDecimal finaldecimal = DecimalUtils.binaryToDecimal(value, precision, scale);
+ parent.add(finaldecimal);
+ }
+ }
+
+
+ /**
* Converts groups into bags
*
* @author Julien Le Dem
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9d58b6a8/parquet-pig/src/test/java/org/apache/parquet/pig/TestDecimalUtils.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/test/java/org/apache/parquet/pig/TestDecimalUtils.java b/parquet-pig/src/test/java/org/apache/parquet/pig/TestDecimalUtils.java
new file mode 100644
index 0000000..3b4afe8
--- /dev/null
+++ b/parquet-pig/src/test/java/org/apache/parquet/pig/TestDecimalUtils.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.pig;
+
+import static org.junit.Assert.assertEquals;
+import org.junit.Test;
+
+import java.math.BigDecimal;
+import static java.lang.Math.abs;
+import java.nio.ByteBuffer;
+
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.pig.convert.DecimalUtils;
+
+public class TestDecimalUtils {
+
+ private void testDecimalConversion(double value, int precision, int scale, String stringValue) {
+ String originalString = Double.toString(value);
+ BigDecimal originalValue = new BigDecimal(originalString);
+ BigDecimal convertedValue = DecimalUtils.binaryToDecimal(Binary.fromByteArray(originalValue.unscaledValue().toByteArray()),
+ precision,scale);
+ assertEquals(stringValue, convertedValue.toString());
+ }
+
+ private void testDecimalConversion(int value, int precision, int scale, String stringValue) {
+ String originalString = Integer.toString(value);
+ BigDecimal originalValue = new BigDecimal(originalString);
+ BigDecimal convertedValue = DecimalUtils.binaryToDecimal(Binary.fromByteArray(originalValue.unscaledValue().toByteArray()),
+ precision,scale);
+ assertEquals(stringValue, convertedValue.toString());
+ }
+
+ private void testDecimalConversion(long value, int precision, int scale, String stringValue) {
+ String originalString = Long.toString(value);
+ BigDecimal originalValue = new BigDecimal(originalString);
+ BigDecimal convertedValue = DecimalUtils.binaryToDecimal(Binary.fromByteArray(originalValue.unscaledValue().toByteArray()),
+ precision, scale);
+ assertEquals(stringValue, convertedValue.toString());
+ }
+
+ @Test
+ public void testBinaryToDecimal() throws Exception {
+ // Known issue: testing Nx10^M doubles from BigDecimal.unscaledValue() always converts to Nx10 regardless of M
+ // Known issue: any double with precision > 17 breaks in test but not in functional testing
+
+ // Test LONG
+ testDecimalConversion(Long.MAX_VALUE,19,0,"9223372036854775807");
+ testDecimalConversion(Long.MIN_VALUE,19,0,"-9223372036854775808");
+ testDecimalConversion(0L,0,0,"0.0");
+
+ // Test INTEGER
+ testDecimalConversion(Integer.MAX_VALUE,10,0,"2147483647");
+ testDecimalConversion(Integer.MIN_VALUE,10,0,"-2147483648");
+ testDecimalConversion(0,0,0,"0.0");
+
+ // Test DOUBLE
+ testDecimalConversion(12345678912345678d,17,0,"12345678912345678");
+ testDecimalConversion(123456789123456.78,17,2,"123456789123456.78");
+ testDecimalConversion(0.12345678912345678,17,17,"0.12345678912345678");
+ testDecimalConversion(-0.000102,6,6,"-0.000102");
+ }
+}