You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/06/12 19:38:50 UTC

svn commit: r1602245 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/io/parquet/convert/ java/org/apache/hadoop/hive/ql/io/parquet/serde/ java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ jav...

Author: xuefu
Date: Thu Jun 12 17:38:50 2014
New Revision: 1602245

URL: http://svn.apache.org/r1602245
Log:
HIVE-7022: Replace BinaryWritable with BytesWritable in Parquet serde (reviewed by Brock)

Removed:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/writable/BinaryWritable.java
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java Thu Jun 12 17:38:50 2014
@@ -25,7 +25,6 @@ import java.util.Map;
 
 import org.apache.hadoop.hive.common.type.Decimal128;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -39,6 +38,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
@@ -391,7 +391,7 @@ public class VectorColumnAssignFactory {
               assignNull(destIndex);
             }
             else {
-              BinaryWritable bw = (BinaryWritable) val;
+              BytesWritable bw = (BytesWritable) val;
               byte[] bytes = bw.getBytes();
               assignBytes(bytes, 0, bytes.length, destIndex);
             }
@@ -491,7 +491,7 @@ public class VectorColumnAssignFactory {
         vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.DOUBLE);
       } else if (writables[i] instanceof Text) {
         vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.STRING);
-      } else if (writables[i] instanceof BinaryWritable) {
+      } else if (writables[i] instanceof BytesWritable) {
         vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BINARY);
       } else if (writables[i] instanceof TimestampWritable) {
         vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.TIMESTAMP);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Thu Jun 12 17:38:50 2014
@@ -17,10 +17,10 @@ import java.math.BigDecimal;
 
 import java.util.ArrayList;
 
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
@@ -99,10 +99,10 @@ public enum ETypeConverter {
   EBINARY_CONVERTER(Binary.class) {
     @Override
     Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
-      return new BinaryConverter<BinaryWritable>(type, parent, index) {
+      return new BinaryConverter<BytesWritable>(type, parent, index) {
         @Override
-        protected BinaryWritable convert(Binary binary) {
-          return new BinaryWritable(binary);
+        protected BytesWritable convert(Binary binary) {
+          return new BytesWritable(binary.getBytes());
         }
       };
     }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java Thu Jun 12 17:38:50 2014
@@ -13,6 +13,7 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.serde;
 
+import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -23,7 +24,6 @@ import java.util.Properties;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.io.IOConstants;
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
 import org.apache.hadoop.hive.serde2.AbstractSerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -52,14 +52,13 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
-import parquet.io.api.Binary;
-
 /**
  *
  * A ParquetHiveSerDe for Hive (with the deprecated package mapred)
@@ -250,7 +249,12 @@ public class ParquetHiveSerDe extends Ab
     case SHORT:
       return new ShortWritable((short) ((ShortObjectInspector) inspector).get(obj));
     case STRING:
-      return new BinaryWritable(Binary.fromString(((StringObjectInspector) inspector).getPrimitiveJavaObject(obj)));
+      String v = ((StringObjectInspector) inspector).getPrimitiveJavaObject(obj);
+      try {
+        return new BytesWritable(v.getBytes("UTF-8"));
+      } catch (UnsupportedEncodingException e) {
+        throw new SerDeException("Failed to encode string in UTF-8", e);
+      }
     case DECIMAL:
       HiveDecimal hd = (HiveDecimal)inspector.getPrimitiveJavaObject(obj);
       DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) inspector.getTypeInfo();
@@ -261,7 +265,7 @@ public class ParquetHiveSerDe extends Ab
       int bytes =  PRECISION_TO_BYTE_COUNT[prec - 1];
       if (bytes == src.length) {
         // No padding needed.
-        return new BinaryWritable(Binary.fromByteArray(src));
+        return new BytesWritable(src);
       }
       byte[] tgt = new byte[bytes];
       if ( hd.signum() == -1) {
@@ -271,7 +275,7 @@ public class ParquetHiveSerDe extends Ab
         }
       }
       System.arraycopy(src, 0, tgt, bytes - src.length, src.length); // Padding leading zeroes/ones.
-      return new BinaryWritable(Binary.fromByteArray(tgt));
+      return new BytesWritable(tgt);
     default:
       throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory());
     }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/primitive/ParquetStringInspector.java Thu Jun 12 17:38:50 2014
@@ -13,13 +13,14 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.serde.primitive;
 
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.CharacterCodingException;
+
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
 
-import parquet.io.api.Binary;
-
 /**
  * The ParquetStringInspector inspects a BinaryWritable to give a Text or String.
  *
@@ -36,8 +37,8 @@ public class ParquetStringInspector exte
       return null;
     }
 
-    if (o instanceof BinaryWritable) {
-      return new Text(((BinaryWritable) o).getBytes());
+    if (o instanceof BytesWritable) {
+      return new Text(((BytesWritable) o).getBytes());
     }
 
     if (o instanceof Text) {
@@ -57,8 +58,12 @@ public class ParquetStringInspector exte
       return null;
     }
 
-    if (o instanceof BinaryWritable) {
-      return ((BinaryWritable) o).getString();
+    if (o instanceof BytesWritable) {
+      try {
+        return Text.decode(((BytesWritable) o).getBytes());
+      } catch (CharacterCodingException e) {
+        throw new RuntimeException("Failed to decode string", e);
+      }
     }
 
     if (o instanceof Text) {
@@ -74,12 +79,16 @@ public class ParquetStringInspector exte
 
   @Override
   public Object set(final Object o, final Text text) {
-    return new BinaryWritable(text == null ? null : Binary.fromByteArray(text.getBytes()));
+    return new BytesWritable(text == null ? null : text.getBytes());
   }
 
   @Override
   public Object set(final Object o, final String string) {
-    return new BinaryWritable(string == null ? null : Binary.fromString(string));
+    try {
+      return new BytesWritable(string == null ? null : string.getBytes("UTF-8"));
+    } catch (UnsupportedEncodingException e) {
+      throw new RuntimeException("Failed to encode string in UTF-8", e);
+    }
   }
 
   @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java Thu Jun 12 17:38:50 2014
@@ -13,19 +13,20 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.write;
 
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Writable;
 
 import parquet.io.ParquetEncodingException;
+import parquet.io.api.Binary;
 import parquet.io.api.RecordConsumer;
 import parquet.schema.GroupType;
 import parquet.schema.Type;
@@ -146,8 +147,8 @@ public class DataWritableWriter {
       recordConsumer.addInteger(((ByteWritable) value).get());
     } else if (value instanceof HiveDecimalWritable) {
       throw new UnsupportedOperationException("HiveDecimalWritable writing not implemented");
-    } else if (value instanceof BinaryWritable) {
-      recordConsumer.addBinary(((BinaryWritable) value).getBinary());
+    } else if (value instanceof BytesWritable) {
+      recordConsumer.addBinary((Binary.fromByteArray(((BytesWritable) value).getBytes())));
     } else {
       throw new IllegalArgumentException("Unknown value type: " + value + " " + value.getClass());
     }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java?rev=1602245&r1=1602244&r2=1602245&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java Thu Jun 12 17:38:50 2014
@@ -19,7 +19,6 @@ import junit.framework.TestCase;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
-import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
@@ -27,8 +26,10 @@ import org.apache.hadoop.hive.serde2.io.
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
 import parquet.io.api.Binary;
@@ -53,13 +54,13 @@ public class TestParquetSerDe extends Te
       arr[2] = new IntWritable(789);
       arr[3] = new LongWritable(1000l);
       arr[4] = new DoubleWritable((double) 5.3);
-      arr[5] = new BinaryWritable(Binary.fromString("hive and hadoop and parquet. Big family."));
+      arr[5] = new BytesWritable("hive and hadoop and parquet. Big family.".getBytes("UTF-8"));
 
       final Writable[] mapContainer = new Writable[1];
       final Writable[] map = new Writable[3];
       for (int i = 0; i < 3; ++i) {
         final Writable[] pair = new Writable[2];
-        pair[0] = new BinaryWritable(Binary.fromString("key_" + i));
+        pair[0] = new BytesWritable(("key_" + i).getBytes("UTF-8"));
         pair[1] = new IntWritable(i);
         map[i] = new ArrayWritable(Writable.class, pair);
       }
@@ -69,7 +70,7 @@ public class TestParquetSerDe extends Te
       final Writable[] arrayContainer = new Writable[1];
       final Writable[] array = new Writable[5];
       for (int i = 0; i < 5; ++i) {
-        array[i] = new BinaryWritable(Binary.fromString("elem_" + i));
+        array[i] = new BytesWritable(("elem_" + i).getBytes("UTF-8"));
       }
       arrayContainer[0] = new ArrayWritable(Writable.class, array);
       arr[7] = new ArrayWritable(Writable.class, arrayContainer);