You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/06/12 19:38:50 UTC
svn commit: r1602245 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/io/parquet/convert/
java/org/apache/hadoop/hive/ql/io/parquet/serde/
java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ jav...
Author: xuefu
Date: Thu Jun 12 17:38:50 2014
New Revision: 1602245
URL: http://svn.apache.org/r1602245
Log:
HIVE-7022: Replace BinaryWritable with BytesWritable in Parquet serde (reviewed by Brock)
Removed:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/writable/BinaryWritable.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java Thu Jun 12 17:38:50 2014
@@ -25,7 +25,6 @@ import java.util.Map;
import org.apache.hadoop.hive.common.type.Decimal128;
import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -39,6 +38,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
@@ -391,7 +391,7 @@ public class VectorColumnAssignFactory {
assignNull(destIndex);
}
else {
- BinaryWritable bw = (BinaryWritable) val;
+ BytesWritable bw = (BytesWritable) val;
byte[] bytes = bw.getBytes();
assignBytes(bytes, 0, bytes.length, destIndex);
}
@@ -491,7 +491,7 @@ public class VectorColumnAssignFactory {
vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.DOUBLE);
} else if (writables[i] instanceof Text) {
vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.STRING);
- } else if (writables[i] instanceof BinaryWritable) {
+ } else if (writables[i] instanceof BytesWritable) {
vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BINARY);
} else if (writables[i] instanceof TimestampWritable) {
vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.TIMESTAMP);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Thu Jun 12 17:38:50 2014
@@ -17,10 +17,10 @@ import java.math.BigDecimal;
import java.util.ArrayList;
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
@@ -99,10 +99,10 @@ public enum ETypeConverter {
EBINARY_CONVERTER(Binary.class) {
@Override
Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
- return new BinaryConverter<BinaryWritable>(type, parent, index) {
+ return new BinaryConverter<BytesWritable>(type, parent, index) {
@Override
- protected BinaryWritable convert(Binary binary) {
- return new BinaryWritable(binary);
+ protected BytesWritable convert(Binary binary) {
+ return new BytesWritable(binary.getBytes());
}
};
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java Thu Jun 12 17:38:50 2014
@@ -13,6 +13,7 @@
*/
package org.apache.hadoop.hive.ql.io.parquet.serde;
+import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -23,7 +24,6 @@ import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.io.IOConstants;
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -52,14 +52,13 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
-import parquet.io.api.Binary;
-
/**
*
* A ParquetHiveSerDe for Hive (with the deprecated package mapred)
@@ -250,7 +249,12 @@ public class ParquetHiveSerDe extends Ab
case SHORT:
return new ShortWritable((short) ((ShortObjectInspector) inspector).get(obj));
case STRING:
- return new BinaryWritable(Binary.fromString(((StringObjectInspector) inspector).getPrimitiveJavaObject(obj)));
+ String v = ((StringObjectInspector) inspector).getPrimitiveJavaObject(obj);
+ try {
+ return new BytesWritable(v.getBytes("UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ throw new SerDeException("Failed to encode string in UTF-8", e);
+ }
case DECIMAL:
HiveDecimal hd = (HiveDecimal)inspector.getPrimitiveJavaObject(obj);
DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) inspector.getTypeInfo();
@@ -261,7 +265,7 @@ public class ParquetHiveSerDe extends Ab
int bytes = PRECISION_TO_BYTE_COUNT[prec - 1];
if (bytes == src.length) {
// No padding needed.
- return new BinaryWritable(Binary.fromByteArray(src));
+ return new BytesWritable(src);
}
byte[] tgt = new byte[bytes];
if ( hd.signum() == -1) {
@@ -271,7 +275,7 @@ public class ParquetHiveSerDe extends Ab
}
}
System.arraycopy(src, 0, tgt, bytes - src.length, src.length); // Padding leading zeroes/ones.
- return new BinaryWritable(Binary.fromByteArray(tgt));
+ return new BytesWritable(tgt);
default:
throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory());
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java Thu Jun 12 17:38:50 2014
@@ -13,13 +13,14 @@
*/
package org.apache.hadoop.hive.ql.io.parquet.serde.primitive;
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.CharacterCodingException;
+
import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
-import parquet.io.api.Binary;
-
/**
* The ParquetStringInspector inspects a BinaryWritable to give a Text or String.
*
@@ -36,8 +37,8 @@ public class ParquetStringInspector exte
return null;
}
- if (o instanceof BinaryWritable) {
- return new Text(((BinaryWritable) o).getBytes());
+ if (o instanceof BytesWritable) {
+ return new Text(((BytesWritable) o).getBytes());
}
if (o instanceof Text) {
@@ -57,8 +58,12 @@ public class ParquetStringInspector exte
return null;
}
- if (o instanceof BinaryWritable) {
- return ((BinaryWritable) o).getString();
+ if (o instanceof BytesWritable) {
+ try {
+ return Text.decode(((BytesWritable) o).getBytes());
+ } catch (CharacterCodingException e) {
+ throw new RuntimeException("Failed to decode string", e);
+ }
}
if (o instanceof Text) {
@@ -74,12 +79,16 @@ public class ParquetStringInspector exte
@Override
public Object set(final Object o, final Text text) {
- return new BinaryWritable(text == null ? null : Binary.fromByteArray(text.getBytes()));
+ return new BytesWritable(text == null ? null : text.getBytes());
}
@Override
public Object set(final Object o, final String string) {
- return new BinaryWritable(string == null ? null : Binary.fromString(string));
+ try {
+ return new BytesWritable(string == null ? null : string.getBytes("UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("Failed to encode string in UTF-8", e);
+ }
}
@Override
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java Thu Jun 12 17:38:50 2014
@@ -13,19 +13,20 @@
*/
package org.apache.hadoop.hive.ql.io.parquet.write;
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import parquet.io.ParquetEncodingException;
+import parquet.io.api.Binary;
import parquet.io.api.RecordConsumer;
import parquet.schema.GroupType;
import parquet.schema.Type;
@@ -146,8 +147,8 @@ public class DataWritableWriter {
recordConsumer.addInteger(((ByteWritable) value).get());
} else if (value instanceof HiveDecimalWritable) {
throw new UnsupportedOperationException("HiveDecimalWritable writing not implemented");
- } else if (value instanceof BinaryWritable) {
- recordConsumer.addBinary(((BinaryWritable) value).getBinary());
+ } else if (value instanceof BytesWritable) {
+ recordConsumer.addBinary((Binary.fromByteArray(((BytesWritable) value).getBytes())));
} else {
throw new IllegalArgumentException("Unknown value type: " + value + " " + value.getClass());
}
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java Thu Jun 12 17:38:50 2014
@@ -19,7 +19,6 @@ import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
@@ -27,8 +26,10 @@ import org.apache.hadoop.hive.serde2.io.
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import parquet.io.api.Binary;
@@ -53,13 +54,13 @@ public class TestParquetSerDe extends Te
arr[2] = new IntWritable(789);
arr[3] = new LongWritable(1000l);
arr[4] = new DoubleWritable((double) 5.3);
- arr[5] = new BinaryWritable(Binary.fromString("hive and hadoop and parquet. Big family."));
+ arr[5] = new BytesWritable("hive and hadoop and parquet. Big family.".getBytes("UTF-8"));
final Writable[] mapContainer = new Writable[1];
final Writable[] map = new Writable[3];
for (int i = 0; i < 3; ++i) {
final Writable[] pair = new Writable[2];
- pair[0] = new BinaryWritable(Binary.fromString("key_" + i));
+ pair[0] = new BytesWritable(("key_" + i).getBytes("UTF-8"));
pair[1] = new IntWritable(i);
map[i] = new ArrayWritable(Writable.class, pair);
}
@@ -69,7 +70,7 @@ public class TestParquetSerDe extends Te
final Writable[] arrayContainer = new Writable[1];
final Writable[] array = new Writable[5];
for (int i = 0; i < 5; ++i) {
- array[i] = new BinaryWritable(Binary.fromString("elem_" + i));
+ array[i] = new BytesWritable(("elem_" + i).getBytes("UTF-8"));
}
arrayContainer[0] = new ArrayWritable(Writable.class, array);
arr[7] = new ArrayWritable(Writable.class, arrayContainer);