You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2010/01/21 10:52:48 UTC
svn commit: r901625 [2/6] - in /hadoop/hive/trunk: ./
serde/src/java/org/apache/hadoop/hive/serde2/
serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/
serde/src/java/org/apache/hadoop/hive/serde2/columnar/
serde/src/java/org/apache/hadoop/hiv...
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/BytesRefWritable.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/BytesRefWritable.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/BytesRefWritable.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/BytesRefWritable.java Thu Jan 21 09:52:44 2010
@@ -39,9 +39,8 @@
int start = 0;
int length = 0;
byte[] bytes = null;
-
+
LazyDecompressionCallback lazyDecompressObj;
-
/**
* Create a zero-size bytes.
@@ -78,7 +77,7 @@
start = offset;
length = len;
}
-
+
/**
* Create a BytesRefWritable referenced to one section of the given bytes. The
* argument <tt>lazyDecompressData</tt> refers to a LazyDecompressionCallback
@@ -86,7 +85,8 @@
* uncompressed bytes of <tt>lazyDecompressData</tt>. Use <tt>offset</tt> and
* <tt>len</tt> after uncompressing the data.
*/
- public BytesRefWritable(LazyDecompressionCallback lazyDecompressData, int offset, int len) {
+ public BytesRefWritable(LazyDecompressionCallback lazyDecompressData,
+ int offset, int len) {
lazyDecompressObj = lazyDecompressData;
start = offset;
length = len;
@@ -102,7 +102,7 @@
* Returns a copy of the underlying bytes referenced by this instance.
*
* @return a new copied byte array
- * @throws IOException
+ * @throws IOException
*/
public byte[] getBytesCopy() throws IOException {
lazyDecompress();
@@ -113,7 +113,8 @@
/**
* Returns the underlying bytes.
- * @throws IOException
+ *
+ * @throws IOException
*/
public byte[] getData() throws IOException {
lazyDecompress();
@@ -132,7 +133,7 @@
length = len;
lazyDecompressObj = null;
}
-
+
/**
* readFields() will corrupt the array. So use the set method whenever
* possible.
@@ -176,11 +177,13 @@
}
/** {@inheritDoc} */
+ @Override
public int hashCode() {
return super.hashCode();
}
/** {@inheritDoc} */
+ @Override
public String toString() {
StringBuffer sb = new StringBuffer(3 * length);
for (int idx = start; idx < length; idx++) {
@@ -201,22 +204,26 @@
/** {@inheritDoc} */
@Override
public int compareTo(BytesRefWritable other) {
- if (other == null)
+ if (other == null) {
throw new IllegalArgumentException("Argument can not be null.");
- if (this == other)
+ }
+ if (this == other) {
return 0;
+ }
try {
- return WritableComparator.compareBytes(getData(), start, getLength(), other
- .getData(), other.start, other.getLength());
+ return WritableComparator.compareBytes(getData(), start, getLength(),
+ other.getData(), other.start, other.getLength());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/** {@inheritDoc} */
+ @Override
public boolean equals(Object right_obj) {
- if (right_obj == null || !(right_obj instanceof BytesRefWritable))
+ if (right_obj == null || !(right_obj instanceof BytesRefWritable)) {
return false;
+ }
return compareTo((BytesRefWritable) right_obj) == 0;
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java Thu Jan 21 09:52:44 2010
@@ -26,8 +26,8 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
@@ -55,6 +55,7 @@
// We need some initial values in case user don't call initialize()
private ObjectInspector cachedObjectInspector;
+ @Override
public String toString() {
return getClass().toString()
+ "["
@@ -89,14 +90,14 @@
// ColumnarObject uses same ObjectInpector as LazyStruct
cachedObjectInspector = LazyFactory.createColumnarStructInspector(
serdeParams.getColumnNames(), serdeParams.getColumnTypes(), serdeParams
- .getSeparators(), serdeParams.getNullSequence(), serdeParams.isEscaped(),
- serdeParams.getEscapeChar());
+ .getSeparators(), serdeParams.getNullSequence(), serdeParams
+ .isEscaped(), serdeParams.getEscapeChar());
+
+ java.util.ArrayList<Integer> notSkipIDs = ColumnProjectionUtils
+ .getReadColumnIDs(job);
-
- java.util.ArrayList<Integer> notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(job);
-
cachedLazyStruct = new ColumnarStruct(cachedObjectInspector, notSkipIDs);
-
+
int size = serdeParams.getColumnTypes().size();
field = new BytesRefWritable[size];
for (int i = 0; i < size; i++) {
@@ -204,17 +205,17 @@
&& (declaredFields == null || declaredFields.get(i)
.getFieldObjectInspector().getCategory().equals(
Category.PRIMITIVE))) {
- LazySimpleSerDe.serialize(serializeStream,
- SerDeUtils.getJSONString(f, foi),
+ LazySimpleSerDe.serialize(serializeStream, SerDeUtils.getJSONString(
+ f, foi),
PrimitiveObjectInspectorFactory.javaStringObjectInspector,
serdeParams.getSeparators(), 1, serdeParams.getNullSequence(),
- serdeParams.isEscaped(), serdeParams.getEscapeChar(),
- serdeParams.getNeedsEscape());
+ serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams
+ .getNeedsEscape());
} else {
- LazySimpleSerDe.serialize(serializeStream, f, foi,
- serdeParams.getSeparators(), 1, serdeParams.getNullSequence(),
- serdeParams.isEscaped(), serdeParams.getEscapeChar(),
- serdeParams.getNeedsEscape());
+ LazySimpleSerDe.serialize(serializeStream, f, foi, serdeParams
+ .getSeparators(), 1, serdeParams.getNullSequence(), serdeParams
+ .isEscaped(), serdeParams.getEscapeChar(), serdeParams
+ .getNeedsEscape());
}
field[i].set(serializeStream.getData(), count, serializeStream
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java Thu Jan 21 09:52:44 2010
@@ -20,9 +20,10 @@
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
import org.apache.hadoop.hive.serde2.lazy.LazyObject;
@@ -31,8 +32,6 @@
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.Text;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
/**
* ColumnarStruct is different from LazyStruct in that ColumnarStruct's field
@@ -47,11 +46,11 @@
* The fields of the struct.
*/
LazyObject[] fields;
-
+
private static final Log LOG = LogFactory.getLog(ColumnarStruct.class);
-
- int[] prjColIDs = null; // list of projected column IDs
-
+
+ int[] prjColIDs = null; // list of projected column IDs
+
/**
* Construct a ColumnarStruct object with the TypeInfo. It creates the first
* level object at the first place
@@ -62,7 +61,7 @@
public ColumnarStruct(ObjectInspector oi) {
this(oi, null);
}
-
+
/**
* Construct a ColumnarStruct object with the TypeInfo. It creates the first
* level object at the first place
@@ -70,44 +69,50 @@
* @param oi
* the ObjectInspector representing the type of this LazyStruct.
* @param notSkippedColumnIDs
- * the column ids that should not be skipped
+ * the column ids that should not be skipped
*/
- public ColumnarStruct(ObjectInspector oi, ArrayList<Integer> notSkippedColumnIDs) {
- List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
+ public ColumnarStruct(ObjectInspector oi,
+ ArrayList<Integer> notSkippedColumnIDs) {
+ List<? extends StructField> fieldRefs = ((StructObjectInspector) oi)
+ .getAllStructFieldRefs();
int num = fieldRefs.size();
fields = new LazyObject[num];
cachedByteArrayRef = new ByteArrayRef[num];
rawBytesField = new BytesRefWritable[num];
fieldSkipped = new boolean[num];
inited = new boolean[num];
-
- //if no columns is set to be skipped, add all columns in 'notSkippedColumnIDs'
- if (notSkippedColumnIDs == null || notSkippedColumnIDs.size() == 0) {
- for (int i = 0; i < num; i++)
- notSkippedColumnIDs.add(i);
- }
-
+
+ // if no columns is set to be skipped, add all columns in
+ // 'notSkippedColumnIDs'
+ if (notSkippedColumnIDs == null || notSkippedColumnIDs.size() == 0) {
+ for (int i = 0; i < num; i++) {
+ notSkippedColumnIDs.add(i);
+ }
+ }
+
for (int i = 0; i < num; i++) {
- fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector());
+ fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i)
+ .getFieldObjectInspector());
cachedByteArrayRef[i] = new ByteArrayRef();
- if(!notSkippedColumnIDs.contains(i)){
- fieldSkipped[i] = true;
- inited[i] = true;
- } else
- inited[i] = false;
- }
-
- // maintain a list of non-NULL column IDs
- int min = notSkippedColumnIDs.size() > num ? num : notSkippedColumnIDs
- .size();
- prjColIDs = new int[min];
- for (int i = 0, index = 0; i < notSkippedColumnIDs.size(); ++i) {
- int readCol = notSkippedColumnIDs.get(i).intValue();
- if (readCol < num) {
- prjColIDs[index] = readCol;
- index++;
- }
- }
+ if (!notSkippedColumnIDs.contains(i)) {
+ fieldSkipped[i] = true;
+ inited[i] = true;
+ } else {
+ inited[i] = false;
+ }
+ }
+
+ // maintain a list of non-NULL column IDs
+ int min = notSkippedColumnIDs.size() > num ? num : notSkippedColumnIDs
+ .size();
+ prjColIDs = new int[min];
+ for (int i = 0, index = 0; i < notSkippedColumnIDs.size(); ++i) {
+ int readCol = notSkippedColumnIDs.get(i).intValue();
+ if (readCol < num) {
+ prjColIDs[index] = readCol;
+ index++;
+ }
+ }
}
/**
@@ -152,8 +157,9 @@
* @return The value of the field
*/
protected Object uncheckedGetField(int fieldID, Text nullSequence) {
- if (fieldSkipped[fieldID])
+ if (fieldSkipped[fieldID]) {
return null;
+ }
if (!inited[fieldID]) {
BytesRefWritable passedInField = rawBytesField[fieldID];
try {
@@ -165,35 +171,36 @@
.getStart(), passedInField.getLength());
inited[fieldID] = true;
}
-
+
byte[] data = cachedByteArrayRef[fieldID].getData();
int fieldLen = rawBytesField[fieldID].length;
-
+
if (fieldLen == nullSequence.getLength()
- && LazyUtils.compare(data, rawBytesField[fieldID].getStart(),
- fieldLen, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) {
+ && LazyUtils.compare(data, rawBytesField[fieldID].getStart(), fieldLen,
+ nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) {
return null;
}
return fields[fieldID].getObject();
}
-
- /* ============================ [PERF] ===================================
- * This function is called for every row. Setting up the selected/projected
- * columns at the first call, and don't do that for the following calls.
- * Ideally this should be done in the constructor where we don't need to
- * branch in the function for each row.
- * =========================================================================
+
+ /*
+ * ============================ [PERF] ===================================
+ * This function is called for every row. Setting up the selected/projected
+ * columns at the first call, and don't do that for the following calls.
+ * Ideally this should be done in the constructor where we don't need to
+ * branch in the function for each row.
+ * =========================================================================
*/
public void init(BytesRefArrayWritable cols) {
- for (int i = 0; i < prjColIDs.length; ++i ) {
+ for (int i = 0; i < prjColIDs.length; ++i) {
int fieldIndex = prjColIDs[i];
- if(fieldIndex < cols.size()){
- rawBytesField[fieldIndex] = cols.unCheckedGet(fieldIndex);
- inited[fieldIndex] = false;
+ if (fieldIndex < cols.size()) {
+ rawBytesField[fieldIndex] = cols.unCheckedGet(fieldIndex);
+ inited[fieldIndex] = false;
} else {
- // select columns that actually do not exist in the file.
- fieldSkipped[fieldIndex] = true;
+ // select columns that actually do not exist in the file.
+ fieldSkipped[fieldIndex] = true;
}
}
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyDecompressionCallback.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyDecompressionCallback.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyDecompressionCallback.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyDecompressionCallback.java Thu Jan 21 09:52:44 2010
@@ -20,9 +20,8 @@
import java.io.IOException;
-
/**
- * Used to call back lazy decompression process.
+ * Used to call back lazy decompression process.
*
* @see #BytesRefWritable
*/
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ByteWritable.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ByteWritable.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ByteWritable.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ByteWritable.java Thu Jan 21 09:52:44 2010
@@ -17,7 +17,6 @@
*/
package org.apache.hadoop.hive.serde2.io;
-
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
@@ -47,33 +46,36 @@
public void set(byte value) {
this.value = value;
}
-
+
public byte get() {
return value;
}
-
+
/** Compares two ByteWritables. */
public int compareTo(Object o) {
- int thisValue = this.value;
+ int thisValue = value;
int thatValue = ((ByteWritable) o).value;
return thisValue - thatValue;
}
+ @Override
public boolean equals(Object o) {
if (o == null || o.getClass() != ByteWritable.class) {
return false;
}
- return get() == ((ByteWritable)o).get();
+ return get() == ((ByteWritable) o).get();
}
-
+
+ @Override
public int hashCode() {
return value;
}
-
+
+ @Override
public String toString() {
return String.valueOf(get());
}
-
+
/** A Comparator optimized for BytesWritable. */
public static class Comparator extends WritableComparator {
public Comparator() {
@@ -83,6 +85,7 @@
/**
* Compare the buffers in serialized form.
*/
+ @Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
int a1 = b1[s1];
int a2 = b2[s2];
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java Thu Jan 21 09:52:44 2010
@@ -52,30 +52,37 @@
out.writeDouble(value);
}
- public void set(double value) { this.value = value; }
+ public void set(double value) {
+ this.value = value;
+ }
- public double get() { return value; }
+ public double get() {
+ return value;
+ }
/**
* Returns true iff <code>o</code> is a DoubleWritable with the same value.
*/
+ @Override
public boolean equals(Object o) {
if (!(o instanceof DoubleWritable)) {
return false;
}
- DoubleWritable other = (DoubleWritable)o;
- return this.value == other.value;
+ DoubleWritable other = (DoubleWritable) o;
+ return value == other.value;
}
+ @Override
public int hashCode() {
- return (int)Double.doubleToLongBits(value);
+ return (int) Double.doubleToLongBits(value);
}
public int compareTo(Object o) {
- DoubleWritable other = (DoubleWritable)o;
+ DoubleWritable other = (DoubleWritable) o;
return (value < other.value ? -1 : (value == other.value ? 0 : 1));
}
+ @Override
public String toString() {
return Double.toString(value);
}
@@ -86,18 +93,16 @@
super(DoubleWritable.class);
}
- public int compare(byte[] b1, int s1, int l1,
- byte[] b2, int s2, int l2) {
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
double thisValue = readDouble(b1, s1);
double thatValue = readDouble(b2, s2);
return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
}
}
- static { // register this comparator
+ static { // register this comparator
WritableComparator.define(DoubleWritable.class, new Comparator());
}
}
-
-
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ShortWritable.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ShortWritable.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ShortWritable.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ShortWritable.java Thu Jan 21 09:52:44 2010
@@ -17,7 +17,6 @@
*/
package org.apache.hadoop.hive.serde2.io;
-
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
@@ -47,28 +46,31 @@
public void set(short value) {
this.value = value;
}
-
+
public short get() {
return value;
}
-
+
+ @Override
public boolean equals(Object o) {
if (o == null || o.getClass() != ShortWritable.class) {
return false;
}
- return get() == ((ShortWritable)o).get();
+ return get() == ((ShortWritable) o).get();
}
+ @Override
public int hashCode() {
return value;
}
-
+
+ @Override
public String toString() {
return String.valueOf(get());
}
-
+
public int compareTo(Object o) {
- int thisValue = this.value;
+ int thisValue = value;
int thatValue = ((ShortWritable) o).value;
return thisValue - thatValue;
}
@@ -82,9 +84,10 @@
/**
* Compare the buffers in serialized form.
*/
+ @Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- int a1 = (short)(readUnsignedShort(b1, s1));
- int a2 = (short)(readUnsignedShort(b2, s2));
+ int a1 = (short) (readUnsignedShort(b1, s1));
+ int a2 = (short) (readUnsignedShort(b2, s2));
return a1 - a2;
}
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java Thu Jan 21 09:52:44 2010
@@ -18,21 +18,24 @@
package org.apache.hadoop.hive.serde2.thrift;
-import org.apache.hadoop.conf.Configuration;
import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
import org.apache.thrift.TException;
/**
- * An interface for TProtocols that need to have properties passed in to
- * initialize them. e.g., separators for TCTLSeparatedProtocol.
- * If there was a regex like deserializer, the regex could be passed in
- * in this manner.
+ * An interface for TProtocols that need to have properties passed in to
+ * initialize them. e.g., separators for TCTLSeparatedProtocol. If there was a
+ * regex like deserializer, the regex could be passed in in this manner.
*/
public interface ConfigurableTProtocol {
/**
* Initialize the TProtocol
- * @param conf System properties
- * @param tbl table properties
+ *
+ * @param conf
+ * System properties
+ * @param tbl
+ * table properties
* @throws TException
*/
public void initialize(Configuration conf, Properties tbl) throws TException;
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java Thu Jan 21 09:52:44 2010
@@ -16,61 +16,65 @@
* limitations under the License.
*/
-
package org.apache.hadoop.hive.serde2.thrift;
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+import java.util.Properties;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-
-import org.apache.thrift.TException;
-import org.apache.thrift.transport.*;
-import org.apache.thrift.protocol.*;
-import java.util.*;
-import java.io.*;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.io.Text;
-
-import java.util.Properties;
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TField;
+import org.apache.thrift.protocol.TList;
+import org.apache.thrift.protocol.TMap;
+import org.apache.thrift.protocol.TMessage;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.protocol.TProtocolFactory;
+import org.apache.thrift.protocol.TSet;
+import org.apache.thrift.protocol.TStruct;
+import org.apache.thrift.transport.TTransport;
/**
* An implementation of the Thrift Protocol for binary sortable records.
*
- * The data format:
- * NULL: a single byte \0
- * NON-NULL Primitives: ALWAYS prepend a single byte \1, and then:
- * Boolean: FALSE = \1, TRUE = \2
- * Byte: flip the sign-bit to make sure negative comes before positive
- * Short: flip the sign-bit to make sure negative comes before positive
- * Int: flip the sign-bit to make sure negative comes before positive
- * Long: flip the sign-bit to make sure negative comes before positive
- * Double: flip the sign-bit for positive double, and all bits for negative double values
- * String: NULL-terminated UTF-8 string, with NULL escaped to \1 \1, and \1 escaped to \1 \2
- * NON-NULL Complex Types:
- * Struct: first the single byte \1, and then one field by one field.
- * List: size stored as Int (see above), then one element by one element.
- * Map: size stored as Int (see above), then one key by one value, and then the next pair and so on.
- * Binary: size stored as Int (see above), then the binary data in its original form
- *
- * Note that the relative order of list/map/binary will be based on the size first (and elements one by one if
- * the sizes are equal).
+ * The data format: NULL: a single byte \0 NON-NULL Primitives: ALWAYS prepend a
+ * single byte \1, and then: Boolean: FALSE = \1, TRUE = \2 Byte: flip the
+ * sign-bit to make sure negative comes before positive Short: flip the sign-bit
+ * to make sure negative comes before positive Int: flip the sign-bit to make
+ * sure negative comes before positive Long: flip the sign-bit to make sure
+ * negative comes before positive Double: flip the sign-bit for positive double,
+ * and all bits for negative double values String: NULL-terminated UTF-8 string,
+ * with NULL escaped to \1 \1, and \1 escaped to \1 \2 NON-NULL Complex Types:
+ * Struct: first the single byte \1, and then one field by one field. List: size
+ * stored as Int (see above), then one element by one element. Map: size stored
+ * as Int (see above), then one key by one value, and then the next pair and so
+ * on. Binary: size stored as Int (see above), then the binary data in its
+ * original form
*
- * This protocol takes an additional parameter SERIALIZATION_SORT_ORDER which is a string containing only "+" and "-".
- * The length of the string should equal to the number of fields in the top-level struct for serialization.
- * "+" means the field should be sorted ascendingly, and "-" means descendingly. The sub fields in the same top-level
- * field will have the same sort order.
+ * Note that the relative order of list/map/binary will be based on the size
+ * first (and elements one by one if the sizes are equal).
*
- * This is not thrift compliant in that it doesn't write out field ids
- * so things cannot actually be versioned.
+ * This protocol takes an additional parameter SERIALIZATION_SORT_ORDER which is
+ * a string containing only "+" and "-". The length of the string should equal
+ * to the number of fields in the top-level struct for serialization. "+" means
+ * the field should be sorted ascendingly, and "-" means descendingly. The sub
+ * fields in the same top-level field will have the same sort order.
+ *
+ * This is not thrift compliant in that it doesn't write out field ids so things
+ * cannot actually be versioned.
*/
-public class TBinarySortableProtocol extends TProtocol implements ConfigurableTProtocol,
- WriteNullsProtocol, WriteTextProtocol {
+public class TBinarySortableProtocol extends TProtocol implements
+ ConfigurableTProtocol, WriteNullsProtocol, WriteTextProtocol {
+
+ final static Log LOG = LogFactory.getLog(TBinarySortableProtocol.class
+ .getName());
- final static Log LOG = LogFactory.getLog(TBinarySortableProtocol.class.getName());
+ static byte ORDERED_TYPE = (byte) -1;
- static byte ORDERED_TYPE = (byte)-1;
-
/**
* Factory for TBinarySortableProtocol objects
*/
@@ -87,75 +91,89 @@
}
/**
- * The stack level of the current field. Top-level fields have a stackLevel value of 1.
- * Each nested struct/list/map will increase the stackLevel value by 1.
+ * The stack level of the current field. Top-level fields have a stackLevel
+ * value of 1. Each nested struct/list/map will increase the stackLevel value
+ * by 1.
*/
int stackLevel;
/**
- * The field ID in the top level struct. This is used to determine whether this field
- * should be sorted ascendingly or descendingly.
+ * The field ID in the top level struct. This is used to determine whether
+ * this field should be sorted ascendingly or descendingly.
*/
int topLevelStructFieldID;
/**
- * A string that consists of only "+" and "-". It should have the same length as the number
- * of fields in the top level struct. "+" means the corresponding field is sorted ascendingly
- * and "-" means the corresponding field is sorted descendingly.
+ * A string that consists of only "+" and "-". It should have the same length
+ * as the number of fields in the top level struct. "+" means the
+ * corresponding field is sorted ascendingly and "-" means the corresponding
+ * field is sorted descendingly.
*/
String sortOrder;
/**
- * Whether the current field is sorted ascendingly. Always equals to
+ * Whether the current field is sorted ascendingly. Always equals to
* sortOrder.charAt(topLevelStructFieldID) != '-'
*/
- boolean ascending;
-
+ boolean ascending;
+
public void initialize(Configuration conf, Properties tbl) throws TException {
sortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER);
if (sortOrder == null) {
sortOrder = "";
}
- for(int i=0; i<sortOrder.length(); i++) {
- char c = sortOrder.charAt(i);
+ for (int i = 0; i < sortOrder.length(); i++) {
+ char c = sortOrder.charAt(i);
if (c != '+' && c != '-') {
- throw new TException(Constants.SERIALIZATION_SORT_ORDER + " should be a string consists of only '+' and '-'!");
+ throw new TException(Constants.SERIALIZATION_SORT_ORDER
+ + " should be a string consists of only '+' and '-'!");
}
}
LOG.info("Sort order is \"" + sortOrder + "\"");
}
+ @Override
public void writeMessageBegin(TMessage message) throws TException {
}
+ @Override
public void writeMessageEnd() throws TException {
}
+ @Override
public void writeStructBegin(TStruct struct) throws TException {
stackLevel++;
if (stackLevel == 1) {
topLevelStructFieldID = 0;
- ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder.charAt(topLevelStructFieldID) != '-');
+ ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder
+ .charAt(topLevelStructFieldID) != '-');
} else {
writeRawBytes(nonNullByte, 0, 1);
- // If the struct is null and level > 1, DynamicSerDe will call writeNull();
- }
+ // If the struct is null and level > 1, DynamicSerDe will call
+ // writeNull();
+ }
}
+ @Override
public void writeStructEnd() throws TException {
- stackLevel --;
+ stackLevel--;
}
+ @Override
public void writeFieldBegin(TField field) throws TException {
}
+ @Override
public void writeFieldEnd() throws TException {
if (stackLevel == 1) {
- topLevelStructFieldID ++;
- ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder.charAt(topLevelStructFieldID) != '-');
+ topLevelStructFieldID++;
+ ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder
+ .charAt(topLevelStructFieldID) != '-');
}
}
+ @Override
public void writeFieldStop() {
}
+ @Override
public void writeMapBegin(TMap map) throws TException {
stackLevel++;
if (map == null) {
@@ -165,10 +183,12 @@
}
}
+ @Override
public void writeMapEnd() throws TException {
- stackLevel --;
+ stackLevel--;
}
+ @Override
public void writeListBegin(TList list) throws TException {
stackLevel++;
if (list == null) {
@@ -178,10 +198,12 @@
}
}
+ @Override
public void writeListEnd() throws TException {
- stackLevel --;
+ stackLevel--;
}
-
+
+ @Override
public void writeSetBegin(TSet set) throws TException {
stackLevel++;
if (set == null) {
@@ -191,14 +213,17 @@
}
}
+ @Override
public void writeSetEnd() throws TException {
- stackLevel --;
+ stackLevel--;
}
byte[] rawBytesBuffer;
+
// This method takes care of bit-flipping for descending order
// Declare this method as final for performance reasons
- final private void writeRawBytes(byte[] bytes, int begin, int length) throws TException {
+ final private void writeRawBytes(byte[] bytes, int begin, int length)
+ throws TException {
if (ascending) {
trans_.write(bytes, begin, length);
} else {
@@ -206,97 +231,109 @@
if (rawBytesBuffer == null || rawBytesBuffer.length < bytes.length) {
rawBytesBuffer = new byte[bytes.length];
}
- for(int i=begin; i<begin+length; i++) {
- rawBytesBuffer[i] = (byte)(~bytes[i]);
+ for (int i = begin; i < begin + length; i++) {
+ rawBytesBuffer[i] = (byte) (~bytes[i]);
}
trans_.write(rawBytesBuffer, begin, length);
}
}
-
- private byte [] bout = new byte[1];
+
+ private final byte[] bout = new byte[1];
+
+ @Override
public void writeBool(boolean b) throws TException {
- bout[0] = (b ? (byte)2 : (byte)1);
+ bout[0] = (b ? (byte) 2 : (byte) 1);
writeRawBytes(bout, 0, 1);
}
+ @Override
public void writeByte(byte b) throws TException {
writeRawBytes(nonNullByte, 0, 1);
- // Make sure negative numbers comes before positive numbers
- bout[0] = (byte) (b ^ 0x80);
+ // Make sure negative numbers comes before positive numbers
+ bout[0] = (byte) (b ^ 0x80);
writeRawBytes(bout, 0, 1);
}
- private byte[] i16out = new byte[2];
+ private final byte[] i16out = new byte[2];
+
+ @Override
public void writeI16(short i16) throws TException {
- i16out[0] = (byte)(0xff & ((i16 >> 8) ^ 0x80));
- i16out[1] = (byte)(0xff & (i16));
+ i16out[0] = (byte) (0xff & ((i16 >> 8) ^ 0x80));
+ i16out[1] = (byte) (0xff & (i16));
writeRawBytes(nonNullByte, 0, 1);
writeRawBytes(i16out, 0, 2);
}
- private byte[] i32out = new byte[4];
+ private final byte[] i32out = new byte[4];
+
+ @Override
public void writeI32(int i32) throws TException {
- i32out[0] = (byte)(0xff & ((i32 >> 24) ^ 0x80));
- i32out[1] = (byte)(0xff & (i32 >> 16));
- i32out[2] = (byte)(0xff & (i32 >> 8));
- i32out[3] = (byte)(0xff & (i32));
+ i32out[0] = (byte) (0xff & ((i32 >> 24) ^ 0x80));
+ i32out[1] = (byte) (0xff & (i32 >> 16));
+ i32out[2] = (byte) (0xff & (i32 >> 8));
+ i32out[3] = (byte) (0xff & (i32));
writeRawBytes(nonNullByte, 0, 1);
writeRawBytes(i32out, 0, 4);
}
- private byte[] i64out = new byte[8];
+ private final byte[] i64out = new byte[8];
+
+ @Override
public void writeI64(long i64) throws TException {
- i64out[0] = (byte)(0xff & ((i64 >> 56) ^ 0x80));
- i64out[1] = (byte)(0xff & (i64 >> 48));
- i64out[2] = (byte)(0xff & (i64 >> 40));
- i64out[3] = (byte)(0xff & (i64 >> 32));
- i64out[4] = (byte)(0xff & (i64 >> 24));
- i64out[5] = (byte)(0xff & (i64 >> 16));
- i64out[6] = (byte)(0xff & (i64 >> 8));
- i64out[7] = (byte)(0xff & (i64));
+ i64out[0] = (byte) (0xff & ((i64 >> 56) ^ 0x80));
+ i64out[1] = (byte) (0xff & (i64 >> 48));
+ i64out[2] = (byte) (0xff & (i64 >> 40));
+ i64out[3] = (byte) (0xff & (i64 >> 32));
+ i64out[4] = (byte) (0xff & (i64 >> 24));
+ i64out[5] = (byte) (0xff & (i64 >> 16));
+ i64out[6] = (byte) (0xff & (i64 >> 8));
+ i64out[7] = (byte) (0xff & (i64));
writeRawBytes(nonNullByte, 0, 1);
writeRawBytes(i64out, 0, 8);
}
+ @Override
public void writeDouble(double dub) throws TException {
long i64 = Double.doubleToLongBits(dub);
if ((i64 & (1L << 63)) != 0) {
// negative numbers, flip all bits
- i64out[0] = (byte)(0xff & ((i64 >> 56) ^ 0xff));
- i64out[1] = (byte)(0xff & ((i64 >> 48) ^ 0xff));
- i64out[2] = (byte)(0xff & ((i64 >> 40) ^ 0xff));
- i64out[3] = (byte)(0xff & ((i64 >> 32) ^ 0xff));
- i64out[4] = (byte)(0xff & ((i64 >> 24) ^ 0xff));
- i64out[5] = (byte)(0xff & ((i64 >> 16) ^ 0xff));
- i64out[6] = (byte)(0xff & ((i64 >> 8) ^ 0xff));
- i64out[7] = (byte)(0xff & ((i64) ^ 0xff));
+ i64out[0] = (byte) (0xff & ((i64 >> 56) ^ 0xff));
+ i64out[1] = (byte) (0xff & ((i64 >> 48) ^ 0xff));
+ i64out[2] = (byte) (0xff & ((i64 >> 40) ^ 0xff));
+ i64out[3] = (byte) (0xff & ((i64 >> 32) ^ 0xff));
+ i64out[4] = (byte) (0xff & ((i64 >> 24) ^ 0xff));
+ i64out[5] = (byte) (0xff & ((i64 >> 16) ^ 0xff));
+ i64out[6] = (byte) (0xff & ((i64 >> 8) ^ 0xff));
+ i64out[7] = (byte) (0xff & ((i64) ^ 0xff));
} else {
// positive numbers, flip just the first bit
- i64out[0] = (byte)(0xff & ((i64 >> 56) ^ 0x80));
- i64out[1] = (byte)(0xff & (i64 >> 48));
- i64out[2] = (byte)(0xff & (i64 >> 40));
- i64out[3] = (byte)(0xff & (i64 >> 32));
- i64out[4] = (byte)(0xff & (i64 >> 24));
- i64out[5] = (byte)(0xff & (i64 >> 16));
- i64out[6] = (byte)(0xff & (i64 >> 8));
- i64out[7] = (byte)(0xff & (i64));
+ i64out[0] = (byte) (0xff & ((i64 >> 56) ^ 0x80));
+ i64out[1] = (byte) (0xff & (i64 >> 48));
+ i64out[2] = (byte) (0xff & (i64 >> 40));
+ i64out[3] = (byte) (0xff & (i64 >> 32));
+ i64out[4] = (byte) (0xff & (i64 >> 24));
+ i64out[5] = (byte) (0xff & (i64 >> 16));
+ i64out[6] = (byte) (0xff & (i64 >> 8));
+ i64out[7] = (byte) (0xff & (i64));
}
writeRawBytes(nonNullByte, 0, 1);
writeRawBytes(i64out, 0, 8);
}
- final protected byte[] nullByte = new byte[] {0};
- final protected byte[] nonNullByte = new byte[] {1};
+ final protected byte[] nullByte = new byte[] { 0 };
+ final protected byte[] nonNullByte = new byte[] { 1 };
/**
- * The escaped byte sequence for the null byte.
- * This cannot be changed alone without changing the readString() code.
+ * The escaped byte sequence for the null byte. This cannot be changed alone
+ * without changing the readString() code.
*/
- final protected byte[] escapedNull = new byte[] {1,1};
+ final protected byte[] escapedNull = new byte[] { 1, 1 };
/**
- * The escaped byte sequence for the "\1" byte.
- * This cannot be changed alone without changing the readString() code.
+ * The escaped byte sequence for the "\1" byte. This cannot be changed alone
+ * without changing the readString() code.
*/
- final protected byte[] escapedOne = new byte[] {1,2};
+ final protected byte[] escapedOne = new byte[] { 1, 2 };
+
+ @Override
public void writeString(String str) throws TException {
byte[] dat;
try {
@@ -307,6 +344,7 @@
writeTextBytes(dat, 0, dat.length);
}
+ @Override
public void writeBinary(byte[] bin) throws TException {
if (bin == null) {
writeRawBytes(nullByte, 0, 1);
@@ -316,52 +354,69 @@
}
}
+ @Override
public TMessage readMessageBegin() throws TException {
- return new TMessage();
+ return new TMessage();
}
+ @Override
public void readMessageEnd() throws TException {
}
TStruct tstruct = new TStruct();
+
+ @Override
public TStruct readStructBegin() throws TException {
stackLevel++;
if (stackLevel == 1) {
topLevelStructFieldID = 0;
- ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder.charAt(topLevelStructFieldID) != '-');
+ ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder
+ .charAt(topLevelStructFieldID) != '-');
} else {
// is this a null?
- // only read the is-null byte for level > 1 because the top-level struct can never be null.
- if (readIsNull()) return null;
+ // only read the is-null byte for level > 1 because the top-level struct
+ // can never be null.
+ if (readIsNull()) {
+ return null;
+ }
}
return tstruct;
}
+ @Override
public void readStructEnd() throws TException {
- stackLevel --;
+ stackLevel--;
}
TField f = null;
+
+ @Override
public TField readFieldBegin() throws TException {
- // slight hack to communicate to DynamicSerDe that the field ids are not being set but things are ordered.
- f = new TField("", ORDERED_TYPE, (short)-1);
- return f;
+ // slight hack to communicate to DynamicSerDe that the field ids are not
+ // being set but things are ordered.
+ f = new TField("", ORDERED_TYPE, (short) -1);
+ return f;
}
+ @Override
public void readFieldEnd() throws TException {
if (stackLevel == 1) {
- topLevelStructFieldID ++;
- ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder.charAt(topLevelStructFieldID) != '-');
+ topLevelStructFieldID++;
+ ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder
+ .charAt(topLevelStructFieldID) != '-');
}
}
private TMap tmap = null;
+
/**
- * This method always return the same instance of TMap to avoid creating new instances.
- * It is the responsibility of the caller to read the value before calling this method again.
+ * This method always return the same instance of TMap to avoid creating new
+ * instances. It is the responsibility of the caller to read the value before
+ * calling this method again.
*/
+ @Override
public TMap readMapBegin() throws TException {
- stackLevel ++;
+ stackLevel++;
tmap = new TMap(ORDERED_TYPE, ORDERED_TYPE, readI32());
if (tmap.size == 0 && lastPrimitiveWasNull()) {
return null;
@@ -369,143 +424,162 @@
return tmap;
}
+ @Override
public void readMapEnd() throws TException {
- stackLevel --;
+ stackLevel--;
}
private TList tlist = null;
+
/**
- * This method always return the same instance of TList to avoid creating new instances.
- * It is the responsibility of the caller to read the value before calling this method again.
+ * This method always return the same instance of TList to avoid creating new
+ * instances. It is the responsibility of the caller to read the value before
+ * calling this method again.
*/
+ @Override
public TList readListBegin() throws TException {
- stackLevel ++;
+ stackLevel++;
tlist = new TList(ORDERED_TYPE, readI32());
if (tlist.size == 0 && lastPrimitiveWasNull()) {
return null;
- }
+ }
return tlist;
}
+ @Override
public void readListEnd() throws TException {
- stackLevel --;
+ stackLevel--;
}
private TSet set = null;
+
/**
- * This method always return the same instance of TSet to avoid creating new instances.
- * It is the responsibility of the caller to read the value before calling this method again.
+ * This method always return the same instance of TSet to avoid creating new
+ * instances. It is the responsibility of the caller to read the value before
+ * calling this method again.
*/
+ @Override
public TSet readSetBegin() throws TException {
- stackLevel ++;
+ stackLevel++;
set = new TSet(ORDERED_TYPE, readI32());
if (set.size == 0 && lastPrimitiveWasNull()) {
return null;
- }
+ }
return set;
}
+ @Override
public void readSetEnd() throws TException {
- stackLevel --;
+ stackLevel--;
}
-
+
// This method takes care of bit-flipping for descending order
- // Make this method final to improve performance.
+ // Make this method final to improve performance.
final private int readRawAll(byte[] buf, int off, int len) throws TException {
int bytes = trans_.readAll(buf, off, len);
if (!ascending) {
- for(int i=off; i<off+bytes; i++) {
- buf[i] = (byte)~buf[i];
+ for (int i = off; i < off + bytes; i++) {
+ buf[i] = (byte) ~buf[i];
}
}
return bytes;
}
+ @Override
public boolean readBool() throws TException {
readRawAll(bin, 0, 1);
lastPrimitiveWasNull = (bin[0] == 0);
return lastPrimitiveWasNull ? false : bin[0] == 2;
}
- private byte[] wasNull = new byte[1];
- final public boolean readIsNull() throws TException {
+ private final byte[] wasNull = new byte[1];
+
+ final public boolean readIsNull() throws TException {
readRawAll(wasNull, 0, 1);
lastPrimitiveWasNull = (wasNull[0] == 0);
return lastPrimitiveWasNull;
}
-
- private byte[] bin = new byte[1];
+
+ private final byte[] bin = new byte[1];
+
+ @Override
public byte readByte() throws TException {
- if (readIsNull()) return 0;
+ if (readIsNull()) {
+ return 0;
+ }
readRawAll(bin, 0, 1);
- return (byte)(bin[0] ^ 0x80);
+ return (byte) (bin[0] ^ 0x80);
}
- private byte[] i16rd = new byte[2];
+ private final byte[] i16rd = new byte[2];
+
+ @Override
public short readI16() throws TException {
- if (readIsNull()) return 0;
+ if (readIsNull()) {
+ return 0;
+ }
readRawAll(i16rd, 0, 2);
- return
- (short)
- ((((i16rd[0]^0x80) & 0xff) << 8) |
- ((i16rd[1] & 0xff)));
+ return (short) ((((i16rd[0] ^ 0x80) & 0xff) << 8) | ((i16rd[1] & 0xff)));
}
- private byte[] i32rd = new byte[4];
+ private final byte[] i32rd = new byte[4];
+
+ @Override
public int readI32() throws TException {
- if (readIsNull()) return 0;
+ if (readIsNull()) {
+ return 0;
+ }
readRawAll(i32rd, 0, 4);
- return
- (((i32rd[0]^0x80) & 0xff) << 24) |
- ((i32rd[1] & 0xff) << 16) |
- ((i32rd[2] & 0xff) << 8) |
- ((i32rd[3] & 0xff));
+ return (((i32rd[0] ^ 0x80) & 0xff) << 24) | ((i32rd[1] & 0xff) << 16)
+ | ((i32rd[2] & 0xff) << 8) | ((i32rd[3] & 0xff));
}
- private byte[] i64rd = new byte[8];
+ private final byte[] i64rd = new byte[8];
+
+ @Override
public long readI64() throws TException {
- if (readIsNull()) return 0;
+ if (readIsNull()) {
+ return 0;
+ }
readRawAll(i64rd, 0, 8);
- return
- ((long)((i64rd[0]^0x80) & 0xff) << 56) |
- ((long)(i64rd[1] & 0xff) << 48) |
- ((long)(i64rd[2] & 0xff) << 40) |
- ((long)(i64rd[3] & 0xff) << 32) |
- ((long)(i64rd[4] & 0xff) << 24) |
- ((long)(i64rd[5] & 0xff) << 16) |
- ((long)(i64rd[6] & 0xff) << 8) |
- ((long)(i64rd[7] & 0xff));
+ return ((long) ((i64rd[0] ^ 0x80) & 0xff) << 56)
+ | ((long) (i64rd[1] & 0xff) << 48) | ((long) (i64rd[2] & 0xff) << 40)
+ | ((long) (i64rd[3] & 0xff) << 32) | ((long) (i64rd[4] & 0xff) << 24)
+ | ((long) (i64rd[5] & 0xff) << 16) | ((long) (i64rd[6] & 0xff) << 8)
+ | ((i64rd[7] & 0xff));
}
+ @Override
public double readDouble() throws TException {
- if (readIsNull()) return 0;
+ if (readIsNull()) {
+ return 0;
+ }
readRawAll(i64rd, 0, 8);
long v = 0;
if ((i64rd[0] & 0x80) != 0) {
// Positive number
- v = ((long)((i64rd[0]^0x80) & 0xff) << 56) |
- ((long)(i64rd[1] & 0xff) << 48) |
- ((long)(i64rd[2] & 0xff) << 40) |
- ((long)(i64rd[3] & 0xff) << 32) |
- ((long)(i64rd[4] & 0xff) << 24) |
- ((long)(i64rd[5] & 0xff) << 16) |
- ((long)(i64rd[6] & 0xff) << 8) |
- ((long)(i64rd[7] & 0xff));
+ v = ((long) ((i64rd[0] ^ 0x80) & 0xff) << 56)
+ | ((long) (i64rd[1] & 0xff) << 48) | ((long) (i64rd[2] & 0xff) << 40)
+ | ((long) (i64rd[3] & 0xff) << 32) | ((long) (i64rd[4] & 0xff) << 24)
+ | ((long) (i64rd[5] & 0xff) << 16) | ((long) (i64rd[6] & 0xff) << 8)
+ | ((i64rd[7] & 0xff));
} else {
// Negative number
- v = ((long)((i64rd[0]^0xff) & 0xff) << 56) |
- ((long)((i64rd[1]^0xff) & 0xff) << 48) |
- ((long)((i64rd[2]^0xff) & 0xff) << 40) |
- ((long)((i64rd[3]^0xff) & 0xff) << 32) |
- ((long)((i64rd[4]^0xff) & 0xff) << 24) |
- ((long)((i64rd[5]^0xff) & 0xff) << 16) |
- ((long)((i64rd[6]^0xff) & 0xff) << 8) |
- ((long)((i64rd[7]^0xff) & 0xff));
+ v = ((long) ((i64rd[0] ^ 0xff) & 0xff) << 56)
+ | ((long) ((i64rd[1] ^ 0xff) & 0xff) << 48)
+ | ((long) ((i64rd[2] ^ 0xff) & 0xff) << 40)
+ | ((long) ((i64rd[3] ^ 0xff) & 0xff) << 32)
+ | ((long) ((i64rd[4] ^ 0xff) & 0xff) << 24)
+ | ((long) ((i64rd[5] ^ 0xff) & 0xff) << 16)
+ | ((long) ((i64rd[6] ^ 0xff) & 0xff) << 8)
+ | (((i64rd[7] ^ 0xff) & 0xff));
}
return Double.longBitsToDouble(v);
}
private byte[] stringBytes = new byte[1000];
+
+ @Override
public String readString() throws TException {
if (readIsNull()) {
return null;
@@ -520,11 +594,11 @@
if (bin[0] == 1) {
// Escaped byte, unescape it.
readRawAll(bin, 0, 1);
- assert(bin[0] == 1 || bin[0] == 2);
- bin[0] = (byte)(bin[0] - 1);
+ assert (bin[0] == 1 || bin[0] == 2);
+ bin[0] = (byte) (bin[0] - 1);
}
if (i == stringBytes.length) {
- stringBytes = Arrays.copyOf(stringBytes, stringBytes.length*2);
+ stringBytes = Arrays.copyOf(stringBytes, stringBytes.length * 2);
}
stringBytes[i] = bin[0];
i++;
@@ -537,15 +611,19 @@
}
}
+ @Override
public byte[] readBinary() throws TException {
int size = readI32();
- if (lastPrimitiveWasNull) return null;
+ if (lastPrimitiveWasNull) {
+ return null;
+ }
byte[] buf = new byte[size];
readRawAll(buf, 0, size);
return buf;
}
boolean lastPrimitiveWasNull;
+
public boolean lastPrimitiveWasNull() throws TException {
return lastPrimitiveWasNull;
}
@@ -554,7 +632,6 @@
writeRawBytes(nullByte, 0, 1);
}
-
void writeTextBytes(byte[] bytes, int start, int length) throws TException {
writeRawBytes(nonNullByte, 0, 1);
int begin = 0;
@@ -563,7 +640,7 @@
if (bytes[i] == 0 || bytes[i] == 1) {
// Write the first part of the array
if (i > begin) {
- writeRawBytes(bytes, begin, i-begin);
+ writeRawBytes(bytes, begin, i - begin);
}
// Write the escaped byte.
if (bytes[i] == 0) {
@@ -573,19 +650,19 @@
}
// Move the pointer to the next byte, since we have written
// out the escaped byte in the block above already.
- begin = i+1;
+ begin = i + 1;
}
}
// Write the remaining part of the array
if (i > begin) {
- writeRawBytes(bytes, begin, i-begin);
+ writeRawBytes(bytes, begin, i - begin);
}
// Write the terminating NULL byte
writeRawBytes(nullByte, 0, 1);
}
-
+
public void writeText(Text text) throws TException {
writeTextBytes(text.getBytes(), 0, text.getLength());
}
-
+
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java Thu Jan 21 09:52:44 2010
@@ -16,42 +16,49 @@
* limitations under the License.
*/
-
package org.apache.hadoop.hive.serde2.thrift;
+import java.io.EOFException;
+import java.nio.charset.CharacterCodingException;
+import java.util.ArrayList;
+import java.util.NoSuchElementException;
+import java.util.Properties;
+import java.util.StringTokenizer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.io.Text;
-
import org.apache.thrift.TException;
-import org.apache.thrift.transport.*;
-import org.apache.thrift.*;
-import org.apache.thrift.protocol.*;
-import java.util.*;
-import java.util.regex.Pattern;
-import java.util.regex.Matcher;
-import java.io.*;
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-
-import org.apache.hadoop.conf.Configuration;
-import java.util.Properties;
+import org.apache.thrift.protocol.TField;
+import org.apache.thrift.protocol.TList;
+import org.apache.thrift.protocol.TMap;
+import org.apache.thrift.protocol.TMessage;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.protocol.TProtocolFactory;
+import org.apache.thrift.protocol.TSet;
+import org.apache.thrift.protocol.TStruct;
+import org.apache.thrift.protocol.TType;
+import org.apache.thrift.transport.TTransport;
+import org.apache.thrift.transport.TTransportException;
/**
- *
- * An implementation of the Thrift Protocol for ctl separated
- * records.
- * This is not thrift compliant in that it doesn't write out field ids
- * so things cannot actually be versioned.
+ *
+ * An implementation of the Thrift Protocol for ctl separated records. This is
+ * not thrift compliant in that it doesn't write out field ids so things cannot
+ * actually be versioned.
*/
-public class TCTLSeparatedProtocol extends TProtocol
- implements ConfigurableTProtocol, WriteNullsProtocol, SkippableTProtocol {
+public class TCTLSeparatedProtocol extends TProtocol implements
+ ConfigurableTProtocol, WriteNullsProtocol, SkippableTProtocol {
- final static Log LOG = LogFactory.getLog(TCTLSeparatedProtocol.class.getName());
+ final static Log LOG = LogFactory.getLog(TCTLSeparatedProtocol.class
+ .getName());
+
+ static byte ORDERED_TYPE = (byte) -1;
- static byte ORDERED_TYPE = (byte)-1;
-
/**
* Factory for JSON protocol objects
*/
@@ -83,20 +90,30 @@
protected Pattern mapPattern;
/**
- * The quote character when supporting quotes with ability to not split across quoted entries. Like csv.
- * Note that escaping the quote is not currently supported.
+ * The quote character when supporting quotes with ability to not split across
+ * quoted entries. Like csv. Note that escaping the quote is not currently
+ * supported.
*/
protected String quote;
-
/**
* Inspect the separators this instance is configured with.
*/
- public String getPrimarySeparator() { return primarySeparator; }
- public String getSecondarySeparator() { return secondarySeparator; }
- public String getRowSeparator() { return rowSeparator; }
- public String getMapSeparator() { return mapSeparator; }
+ public String getPrimarySeparator() {
+ return primarySeparator;
+ }
+
+ public String getSecondarySeparator() {
+ return secondarySeparator;
+ }
+ public String getRowSeparator() {
+ return rowSeparator;
+ }
+
+ public String getMapSeparator() {
+ return mapSeparator;
+ }
/**
* The transport stream is tokenized on the row separator
@@ -124,7 +141,6 @@
*/
protected int innerIndex;
-
/**
* Is this the first field we're writing
*/
@@ -135,41 +151,38 @@
*/
protected boolean firstInnerField;
-
/**
* Are we writing a map and need to worry about k/v separator?
*/
protected boolean isMap;
-
/**
- * For writes, on what element are we on so we know when to use normal list separator or
- * for a map know when to use the k/v separator
+ * For writes, on what element are we on so we know when to use normal list
+ * separator or for a map know when to use the k/v separator
*/
protected long elemIndex;
-
/**
* Are we currently on the top-level columns or parsing a column itself
*/
protected boolean inner;
-
/**
- * For places where the separators are back to back, should we return a null or an empty string since it is ambiguous.
- * This also applies to extra columns that are read but aren't in the current record.
+ * For places where the separators are back to back, should we return a null
+ * or an empty string since it is ambiguous. This also applies to extra
+ * columns that are read but aren't in the current record.
*/
protected boolean returnNulls;
/**
* The transport being wrapped.
- *
+ *
*/
final protected TTransport innerTransport;
-
/**
- * Strings used to lookup the various configurable paramaters of this protocol.
+ * Strings used to lookup the various configurable paramaters of this
+ * protocol.
*/
public final static String ReturnNullsKey = "separators.return_nulls";
public final static String BufferSizeKey = "separators.buffer_size";
@@ -186,10 +199,9 @@
/**
* The nullString in UTF-8 bytes
- */
+ */
protected Text nullText;
-
/**
* A convenience class for tokenizing a TTransport
*/
@@ -201,63 +213,65 @@
final String separator;
byte buf[];
- public SimpleTransportTokenizer(TTransport trans, String separator, int buffer_length) {
+ public SimpleTransportTokenizer(TTransport trans, String separator,
+ int buffer_length) {
this.trans = trans;
this.separator = separator;
buf = new byte[buffer_length];
- // do not fill tokenizer until user requests since filling it could read in data
+ // do not fill tokenizer until user requests since filling it could read
+ // in data
// not meant for this instantiation.
fillTokenizer();
}
private boolean fillTokenizer() {
try {
- int length = trans.read(buf, 0, buf.length);
- if(length <=0 ) {
- tokenizer = new StringTokenizer("", separator, true);
- return false;
- }
- String row;
- try {
- row = Text.decode(buf, 0, length);
- } catch (CharacterCodingException e) {
- throw new RuntimeException(e);
- }
- tokenizer = new StringTokenizer(row, separator, true);
- } catch(TTransportException e) {
- e.printStackTrace();
- tokenizer = null;
+ int length = trans.read(buf, 0, buf.length);
+ if (length <= 0) {
+ tokenizer = new StringTokenizer("", separator, true);
return false;
}
- return true;
+ String row;
+ try {
+ row = Text.decode(buf, 0, length);
+ } catch (CharacterCodingException e) {
+ throw new RuntimeException(e);
+ }
+ tokenizer = new StringTokenizer(row, separator, true);
+ } catch (TTransportException e) {
+ e.printStackTrace();
+ tokenizer = null;
+ return false;
+ }
+ return true;
}
public String nextToken() throws EOFException {
StringBuffer ret = null;
boolean done = false;
- if(tokenizer == null) {
+ if (tokenizer == null) {
fillTokenizer();
}
- while(! done) {
+ while (!done) {
- if(! tokenizer.hasMoreTokens()) {
- if(! fillTokenizer()) {
+ if (!tokenizer.hasMoreTokens()) {
+ if (!fillTokenizer()) {
break;
}
}
try {
final String nextToken = tokenizer.nextToken();
- if(nextToken.equals(separator)) {
+ if (nextToken.equals(separator)) {
done = true;
- } else if(ret == null) {
+ } else if (ret == null) {
ret = new StringBuffer(nextToken);
} else {
ret.append(nextToken);
}
- } catch(NoSuchElementException e) {
+ } catch (NoSuchElementException e) {
if (ret == null) {
throw new EOFException(e.getMessage());
}
@@ -269,34 +283,45 @@
}
};
-
/**
- * The simple constructor which assumes ctl-a, ctl-b and '\n' separators and to return empty strings for empty fields.
- *
- * @param trans - the ttransport to use as input or output
- *
+ * The simple constructor which assumes ctl-a, ctl-b and '\n' separators and
+ * to return empty strings for empty fields.
+ *
+ * @param trans
+ * - the ttransport to use as input or output
+ *
*/
public TCTLSeparatedProtocol(TTransport trans) {
- this(trans, defaultPrimarySeparator, defaultSecondarySeparator, defaultMapSeparator, defaultRowSeparator, true, 4096);
+ this(trans, defaultPrimarySeparator, defaultSecondarySeparator,
+ defaultMapSeparator, defaultRowSeparator, true, 4096);
}
public TCTLSeparatedProtocol(TTransport trans, int buffer_size) {
- this(trans, defaultPrimarySeparator, defaultSecondarySeparator, defaultMapSeparator, defaultRowSeparator, true, buffer_size);
+ this(trans, defaultPrimarySeparator, defaultSecondarySeparator,
+ defaultMapSeparator, defaultRowSeparator, true, buffer_size);
}
/**
- * @param trans - the ttransport to use as input or output
- * @param primarySeparator the separator between columns (aka fields)
- * @param secondarySeparator the separator within a field for things like sets and maps and lists
- * @param mapSeparator - the key/value separator
- * @param rowSeparator - the record separator
- * @param returnNulls - whether to return a null or an empty string for fields that seem empty (ie two primary separators back to back)
- */
-
- public TCTLSeparatedProtocol(TTransport trans, String primarySeparator, String secondarySeparator, String mapSeparator, String rowSeparator,
- boolean returnNulls,
- int bufferSize) {
+ * @param trans
+ * - the ttransport to use as input or output
+ * @param primarySeparator
+ * the separator between columns (aka fields)
+ * @param secondarySeparator
+ * the separator within a field for things like sets and maps and
+ * lists
+ * @param mapSeparator
+ * - the key/value separator
+ * @param rowSeparator
+ * - the record separator
+ * @param returnNulls
+ * - whether to return a null or an empty string for fields that seem
+ * empty (ie two primary separators back to back)
+ */
+
+ public TCTLSeparatedProtocol(TTransport trans, String primarySeparator,
+ String secondarySeparator, String mapSeparator, String rowSeparator,
+ boolean returnNulls, int bufferSize) {
super(trans);
this.returnNulls = returnNulls;
@@ -306,51 +331,52 @@
this.rowSeparator = rowSeparator;
this.mapSeparator = mapSeparator;
- this.innerTransport = trans;
+ innerTransport = trans;
this.bufferSize = bufferSize;
- this.nullString = "\\N";
+ nullString = "\\N";
}
-
-
/**
* Sets the internal separator patterns and creates the internal tokenizer.
*/
protected void internalInitialize() {
- // in the future could allow users to specify a quote character that doesn't need escaping but for now ...
- final String primaryPatternString =
- quote == null ? primarySeparator :
- "(?:^|" + primarySeparator + ")(" + quote + "(?:[^" + quote + "]+|" + quote + quote + ")*" + quote + "|[^" + primarySeparator + "]*)";
+ // in the future could allow users to specify a quote character that doesn't
+ // need escaping but for now ...
+ final String primaryPatternString = quote == null ? primarySeparator
+ : "(?:^|" + primarySeparator + ")(" + quote + "(?:[^" + quote + "]+|"
+ + quote + quote + ")*" + quote + "|[^" + primarySeparator + "]*)";
if (quote != null) {
stripSeparatorPrefix = Pattern.compile("^" + primarySeparator);
stripQuotePrefix = Pattern.compile("^" + quote);
stripQuotePostfix = Pattern.compile(quote + "$");
- }
+ }
primaryPattern = Pattern.compile(primaryPatternString);
secondaryPattern = Pattern.compile(secondarySeparator);
mapPattern = Pattern.compile(secondarySeparator + "|" + mapSeparator);
nullText = new Text(nullString);
- transportTokenizer = new SimpleTransportTokenizer(innerTransport, rowSeparator, bufferSize);
+ transportTokenizer = new SimpleTransportTokenizer(innerTransport,
+ rowSeparator, bufferSize);
}
/**
- * For quoted fields, strip away the quotes and also need something to strip away the control separator when using
- * complex split method defined here.
+ * For quoted fields, strip away the quotes and also need something to strip
+ * away the control separator when using complex split method defined here.
*/
protected Pattern stripSeparatorPrefix;
protected Pattern stripQuotePrefix;
protected Pattern stripQuotePostfix;
-
- /**
- *
+ /**
+ *
* Split the line based on a complex regex pattern
- *
- * @param line the current row
- * @param p the pattern for matching fields in the row
+ *
+ * @param line
+ * the current row
+ * @param p
+ * the pattern for matching fields in the row
* @return List of Strings - not including the separator in them
*/
protected String[] complexSplit(String line, Pattern p) {
@@ -360,104 +386,116 @@
// For each field
while (m.find()) {
String match = m.group();
- if (match == null)
+ if (match == null) {
break;
- if (match.length() == 0)
+ }
+ if (match.length() == 0) {
match = null;
- else {
- if(stripSeparatorPrefix.matcher(match).find()) {
+ } else {
+ if (stripSeparatorPrefix.matcher(match).find()) {
match = match.substring(1);
}
- if(stripQuotePrefix.matcher(match).find()) {
+ if (stripQuotePrefix.matcher(match).find()) {
match = match.substring(1);
}
- if(stripQuotePostfix.matcher(match).find()) {
- match = match.substring(0,match.length() - 1);
+ if (stripQuotePostfix.matcher(match).find()) {
+ match = match.substring(0, match.length() - 1);
}
}
list.add(match);
}
- return (String[])list.toArray(new String[1]);
+ return list.toArray(new String[1]);
}
-
-
protected String getByteValue(String altValue, String defaultVal) {
if (altValue != null && altValue.length() > 0) {
try {
- byte b [] = new byte[1];
+ byte b[] = new byte[1];
b[0] = Byte.valueOf(altValue).byteValue();
return new String(b);
- } catch(NumberFormatException e) {
+ } catch (NumberFormatException e) {
return altValue;
}
}
return defaultVal;
}
-
/**
* Initialize the TProtocol
- * @param conf System properties
- * @param tbl table properties
+ *
+ * @param conf
+ * System properties
+ * @param tbl
+ * table properties
* @throws TException
*/
public void initialize(Configuration conf, Properties tbl) throws TException {
-
- primarySeparator = getByteValue(tbl.getProperty(Constants.FIELD_DELIM), primarySeparator);
- secondarySeparator = getByteValue(tbl.getProperty(Constants.COLLECTION_DELIM), secondarySeparator);
- rowSeparator = getByteValue(tbl.getProperty(Constants.LINE_DELIM), rowSeparator);
- mapSeparator = getByteValue(tbl.getProperty(Constants.MAPKEY_DELIM), mapSeparator);
- returnNulls = Boolean.valueOf(tbl.getProperty(ReturnNullsKey, String.valueOf(returnNulls))).booleanValue();
- bufferSize = Integer.valueOf(tbl.getProperty(BufferSizeKey, String.valueOf(bufferSize))).intValue();
- nullString = tbl.getProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N");
- quote = tbl.getProperty(Constants.QUOTE_CHAR, null);
+ primarySeparator = getByteValue(tbl.getProperty(Constants.FIELD_DELIM),
+ primarySeparator);
+ secondarySeparator = getByteValue(tbl
+ .getProperty(Constants.COLLECTION_DELIM), secondarySeparator);
+ rowSeparator = getByteValue(tbl.getProperty(Constants.LINE_DELIM),
+ rowSeparator);
+ mapSeparator = getByteValue(tbl.getProperty(Constants.MAPKEY_DELIM),
+ mapSeparator);
+ returnNulls = Boolean.valueOf(
+ tbl.getProperty(ReturnNullsKey, String.valueOf(returnNulls)))
+ .booleanValue();
+ bufferSize = Integer.valueOf(
+ tbl.getProperty(BufferSizeKey, String.valueOf(bufferSize))).intValue();
+ nullString = tbl.getProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N");
+ quote = tbl.getProperty(Constants.QUOTE_CHAR, null);
internalInitialize();
}
+ @Override
public void writeMessageBegin(TMessage message) throws TException {
}
+ @Override
public void writeMessageEnd() throws TException {
}
+ @Override
public void writeStructBegin(TStruct struct) throws TException {
firstField = true;
}
+ @Override
public void writeStructEnd() throws TException {
- // We don't write rowSeparatorByte because that should be handled by file format.
+ // We don't write rowSeparatorByte because that should be handled by file
+ // format.
}
+ @Override
public void writeFieldBegin(TField field) throws TException {
- if(! firstField) {
+ if (!firstField) {
internalWriteString(primarySeparator);
}
firstField = false;
}
+ @Override
public void writeFieldEnd() throws TException {
}
+ @Override
public void writeFieldStop() {
}
+ @Override
public void writeMapBegin(TMap map) throws TException {
// nesting not allowed!
- if(map.keyType == TType.STRUCT ||
- map.keyType == TType.MAP ||
- map.keyType == TType.LIST ||
- map.keyType == TType.SET) {
+ if (map.keyType == TType.STRUCT || map.keyType == TType.MAP
+ || map.keyType == TType.LIST || map.keyType == TType.SET) {
throw new TException("Not implemented: nested structures");
}
// nesting not allowed!
- if(map.valueType == TType.STRUCT ||
- map.valueType == TType.MAP ||
- map.valueType == TType.LIST ||
- map.valueType == TType.SET) {
+ if (map.valueType == TType.STRUCT || map.valueType == TType.MAP
+ || map.valueType == TType.LIST || map.valueType == TType.SET) {
throw new TException("Not implemented: nested structures");
}
@@ -467,71 +505,80 @@
elemIndex = 0;
}
+ @Override
public void writeMapEnd() throws TException {
isMap = false;
inner = false;
}
+ @Override
public void writeListBegin(TList list) throws TException {
- if(list.elemType == TType.STRUCT ||
- list.elemType == TType.MAP ||
- list.elemType == TType.LIST ||
- list.elemType == TType.SET) {
+ if (list.elemType == TType.STRUCT || list.elemType == TType.MAP
+ || list.elemType == TType.LIST || list.elemType == TType.SET) {
throw new TException("Not implemented: nested structures");
}
firstInnerField = true;
inner = true;
}
+ @Override
public void writeListEnd() throws TException {
inner = false;
}
-
+
+ @Override
public void writeSetBegin(TSet set) throws TException {
- if(set.elemType == TType.STRUCT ||
- set.elemType == TType.MAP ||
- set.elemType == TType.LIST ||
- set.elemType == TType.SET) {
+ if (set.elemType == TType.STRUCT || set.elemType == TType.MAP
+ || set.elemType == TType.LIST || set.elemType == TType.SET) {
throw new TException("Not implemented: nested structures");
}
firstInnerField = true;
inner = true;
}
+ @Override
public void writeSetEnd() throws TException {
inner = false;
}
+ @Override
public void writeBool(boolean b) throws TException {
writeString(String.valueOf(b));
}
// for writing out single byte
- private byte buf[] = new byte[1];
+ private final byte buf[] = new byte[1];
+
+ @Override
public void writeByte(byte b) throws TException {
buf[0] = b;
trans_.write(buf);
}
+ @Override
public void writeI16(short i16) throws TException {
writeString(String.valueOf(i16));
}
+ @Override
public void writeI32(int i32) throws TException {
writeString(String.valueOf(i32));
}
+ @Override
public void writeI64(long i64) throws TException {
writeString(String.valueOf(i64));
}
+ @Override
public void writeDouble(double dub) throws TException {
writeString(String.valueOf(dub));
}
Text tmpText = new Text();
- public void internalWriteString(String str) throws TException {
- if(str != null) {
+
+ public void internalWriteString(String str) throws TException {
+ if (str != null) {
tmpText.set(str);
trans_.write(tmpText.getBytes(), 0, tmpText.getLength());
} else {
@@ -539,11 +586,13 @@
}
}
+ @Override
public void writeString(String str) throws TException {
- if(inner) {
- if(!firstInnerField) {
- // super hack city notice the mod plus only happens after firstfield hit, so == 0 is right.
- if(isMap && elemIndex++ % 2 == 0) {
+ if (inner) {
+ if (!firstInnerField) {
+ // super hack city notice the mod plus only happens after firstfield
+ // hit, so == 0 is right.
+ if (isMap && elemIndex++ % 2 == 0) {
internalWriteString(mapSeparator);
} else {
internalWriteString(secondarySeparator);
@@ -555,72 +604,79 @@
internalWriteString(str);
}
+ @Override
public void writeBinary(byte[] bin) throws TException {
- throw new TException("Ctl separated protocol cannot support writing Binary data!");
+ throw new TException(
+ "Ctl separated protocol cannot support writing Binary data!");
}
+ @Override
public TMessage readMessageBegin() throws TException {
- return new TMessage();
+ return new TMessage();
}
+ @Override
public void readMessageEnd() throws TException {
}
- public TStruct readStructBegin() throws TException {
- assert(!inner);
- try {
- final String tmp = transportTokenizer.nextToken();
- columns = quote == null ? primaryPattern.split(tmp) : complexSplit(tmp, primaryPattern);
- index = 0;
- return new TStruct();
- } catch(EOFException e) {
- return null;
- }
+ @Override
+ public TStruct readStructBegin() throws TException {
+ assert (!inner);
+ try {
+ final String tmp = transportTokenizer.nextToken();
+ columns = quote == null ? primaryPattern.split(tmp) : complexSplit(tmp,
+ primaryPattern);
+ index = 0;
+ return new TStruct();
+ } catch (EOFException e) {
+ return null;
+ }
}
+ @Override
public void readStructEnd() throws TException {
columns = null;
}
-
/**
- * Skip past the current field
- * Just increments the field index counter.
+ * Skip past the current field Just increments the field index counter.
*/
public void skip(byte type) {
- if( inner) {
+ if (inner) {
innerIndex++;
} else {
index++;
}
}
-
+ @Override
public TField readFieldBegin() throws TException {
- assert( !inner);
- TField f = new TField("", ORDERED_TYPE, (short)-1);
- // slight hack to communicate to DynamicSerDe that the field ids are not being set but things are ordered.
- return f;
+ assert (!inner);
+ TField f = new TField("", ORDERED_TYPE, (short) -1);
+ // slight hack to communicate to DynamicSerDe that the field ids are not
+ // being set but things are ordered.
+ return f;
}
+ @Override
public void readFieldEnd() throws TException {
fields = null;
}
+ @Override
public TMap readMapBegin() throws TException {
- assert( !inner);
+ assert (!inner);
TMap map = new TMap();
- if(columns[index] == null ||
- columns[index].equals(nullString)) {
+ if (columns[index] == null || columns[index].equals(nullString)) {
index++;
- if(returnNulls) {
+ if (returnNulls) {
return null;
}
- } else if(columns[index].isEmpty()) {
+ } else if (columns[index].isEmpty()) {
index++;
} else {
fields = mapPattern.split(columns[index++]);
- map = new TMap(ORDERED_TYPE, ORDERED_TYPE, fields.length/2);
+ map = new TMap(ORDERED_TYPE, ORDERED_TYPE, fields.length / 2);
}
innerIndex = 0;
inner = true;
@@ -628,21 +684,22 @@
return map;
}
+ @Override
public void readMapEnd() throws TException {
inner = false;
isMap = false;
}
+ @Override
public TList readListBegin() throws TException {
- assert( !inner);
+ assert (!inner);
TList list = new TList();
- if(columns[index] == null ||
- columns[index].equals(nullString)) {
+ if (columns[index] == null || columns[index].equals(nullString)) {
index++;
- if(returnNulls) {
+ if (returnNulls) {
return null;
}
- } else if(columns[index].isEmpty()) {
+ } else if (columns[index].isEmpty()) {
index++;
} else {
fields = secondaryPattern.split(columns[index++]);
@@ -653,20 +710,21 @@
return list;
}
+ @Override
public void readListEnd() throws TException {
inner = false;
}
+ @Override
public TSet readSetBegin() throws TException {
- assert( !inner);
+ assert (!inner);
TSet set = new TSet();
- if(columns[index] == null ||
- columns[index].equals(nullString)) {
+ if (columns[index] == null || columns[index].equals(nullString)) {
index++;
- if(returnNulls) {
+ if (returnNulls) {
return null;
}
- } else if(columns[index].isEmpty()) {
+ } else if (columns[index].isEmpty()) {
index++;
} else {
fields = secondaryPattern.split(columns[index++]);
@@ -687,16 +745,20 @@
writeString(null);
}
+ @Override
public void readSetEnd() throws TException {
inner = false;
}
+ @Override
public boolean readBool() throws TException {
String val = readString();
lastPrimitiveWasNullFlag = val == null;
- return val == null || val.isEmpty() ? false : Boolean.valueOf(val).booleanValue();
+ return val == null || val.isEmpty() ? false : Boolean.valueOf(val)
+ .booleanValue();
}
+ @Override
public byte readByte() throws TException {
String val = readString();
lastPrimitiveWasNullFlag = val == null;
@@ -708,6 +770,7 @@
}
}
+ @Override
public short readI16() throws TException {
String val = readString();
lastPrimitiveWasNullFlag = val == null;
@@ -719,6 +782,7 @@
}
}
+ @Override
public int readI32() throws TException {
String val = readString();
lastPrimitiveWasNullFlag = val == null;
@@ -730,6 +794,7 @@
}
}
+ @Override
public long readI64() throws TException {
String val = readString();
lastPrimitiveWasNullFlag = val == null;
@@ -741,32 +806,38 @@
}
}
+ @Override
public double readDouble() throws TException {
String val = readString();
lastPrimitiveWasNullFlag = val == null;
try {
- return val == null || val.isEmpty() ? 0 :Double.valueOf(val).doubleValue();
+ return val == null || val.isEmpty() ? 0 : Double.valueOf(val)
+ .doubleValue();
} catch (NumberFormatException e) {
lastPrimitiveWasNullFlag = true;
return 0;
}
}
+ @Override
public String readString() throws TException {
String ret;
- if(!inner) {
- ret = columns != null && index < columns.length ? columns[index] : null;
+ if (!inner) {
+ ret = columns != null && index < columns.length ? columns[index] : null;
index++;
} else {
- ret = fields != null && innerIndex < fields.length ? fields[innerIndex] : null;
+ ret = fields != null && innerIndex < fields.length ? fields[innerIndex]
+ : null;
innerIndex++;
}
- if(ret == null || ret.equals(nullString))
+ if (ret == null || ret.equals(nullString)) {
return returnNulls ? null : "";
- else
+ } else {
return ret;
+ }
}
+ @Override
public byte[] readBinary() throws TException {
throw new TException("Not implemented for control separated data");
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TReflectionUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TReflectionUtils.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TReflectionUtils.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TReflectionUtils.java Thu Jan 21 09:52:44 2010
@@ -20,25 +20,23 @@
import org.apache.thrift.protocol.TProtocolFactory;
-
public class TReflectionUtils {
public static final String thriftReaderFname = "read";
public static final String thriftWriterFname = "write";
- public static final Class<?> [] thriftRWParams;
+ public static final Class<?>[] thriftRWParams;
static {
try {
- thriftRWParams = new Class [] {
- Class.forName("org.apache.thrift.protocol.TProtocol")
- };
+ thriftRWParams = new Class[] { Class
+ .forName("org.apache.thrift.protocol.TProtocol") };
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
public static TProtocolFactory getProtocolFactoryByName(String protocolName)
- throws Exception {
+ throws Exception {
Class<?> protoClass = Class.forName(protocolName + "$Factory");
- return ((TProtocolFactory)protoClass.newInstance());
+ return ((TProtocolFactory) protoClass.newInstance());
}
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java Thu Jan 21 09:52:44 2010
@@ -26,7 +26,6 @@
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.io.Writable;
-
import org.apache.thrift.TBase;
import org.apache.thrift.protocol.TProtocol;
import org.apache.thrift.protocol.TProtocolFactory;
@@ -37,19 +36,24 @@
protected TIOStreamTransport outTransport, inTransport;
protected TProtocol outProtocol, inProtocol;
- private void init(TProtocolFactory inFactory, TProtocolFactory outFactory) throws Exception {
+ private void init(TProtocolFactory inFactory, TProtocolFactory outFactory)
+ throws Exception {
outTransport = new TIOStreamTransport(bos);
inTransport = new TIOStreamTransport(bis);
outProtocol = outFactory.getProtocol(outTransport);
inProtocol = inFactory.getProtocol(inTransport);
}
- public void initialize(Configuration job, Properties tbl) throws SerDeException {
- throw new SerDeException("ThriftByteStreamTypedSerDe is still semi-abstract");
+ @Override
+ public void initialize(Configuration job, Properties tbl)
+ throws SerDeException {
+ throw new SerDeException(
+ "ThriftByteStreamTypedSerDe is still semi-abstract");
}
- public ThriftByteStreamTypedSerDe(Type objectType, TProtocolFactory inFactory,
- TProtocolFactory outFactory) throws SerDeException {
+ public ThriftByteStreamTypedSerDe(Type objectType,
+ TProtocolFactory inFactory, TProtocolFactory outFactory)
+ throws SerDeException {
super(objectType);
try {
init(inFactory, outFactory);
@@ -58,14 +62,16 @@
}
}
+ @Override
protected ObjectInspectorFactory.ObjectInspectorOptions getObjectInspectorOptions() {
return ObjectInspectorFactory.ObjectInspectorOptions.THRIFT;
}
-
+
+ @Override
public Object deserialize(Writable field) throws SerDeException {
Object obj = super.deserialize(field);
try {
- ((TBase)obj).read(inProtocol);
+ ((TBase) obj).read(inProtocol);
} catch (Exception e) {
throw new SerDeException(e);
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java Thu Jan 21 09:52:44 2010
@@ -31,27 +31,33 @@
private ThriftByteStreamTypedSerDe tsd;
- public ThriftDeserializer() { }
-
- public void initialize(Configuration job, Properties tbl) throws SerDeException {
+ public ThriftDeserializer() {
+ }
+
+ public void initialize(Configuration job, Properties tbl)
+ throws SerDeException {
try {
// both the classname and the protocol name are Table properties
// the only hardwired assumption is that records are fixed on a
// per Table basis
- String className = tbl.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_CLASS);
+ String className = tbl
+ .getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_CLASS);
Class<?> recordClass = job.getClassByName(className);
- String protoName = tbl.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT);
+ String protoName = tbl
+ .getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT);
if (protoName == null) {
protoName = "TBinaryProtocol";
}
// For backward compatibility
- protoName = protoName.replace("com.facebook.thrift.protocol", "org.apache.thrift.protocol");
+ protoName = protoName.replace("com.facebook.thrift.protocol",
+ "org.apache.thrift.protocol");
- TProtocolFactory tp = TReflectionUtils.getProtocolFactoryByName(protoName);
+ TProtocolFactory tp = TReflectionUtils
+ .getProtocolFactoryByName(protoName);
tsd = new ThriftByteStreamTypedSerDe(recordClass, tp, tp);
-
+
} catch (Exception e) {
throw new SerDeException(e);
}
@@ -60,7 +66,7 @@
public Object deserialize(Writable field) throws SerDeException {
return tsd.deserialize(field);
}
-
+
public ObjectInspector getObjectInspector() throws SerDeException {
return tsd.getObjectInspector();
}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java Thu Jan 21 09:52:44 2010
@@ -21,23 +21,22 @@
import org.apache.thrift.TException;
/**
- * An interface for TProtocols that actually write out nulls -
- * This should be for all those that don't actually use
- * fieldids in the written data like TCTLSeparatedProtocol.
+ * An interface for TProtocols that actually write out nulls - This should be
+ * for all those that don't actually use fieldids in the written data like
+ * TCTLSeparatedProtocol.
*
*/
public interface WriteNullsProtocol {
/**
- * Was the last primitive read really a NULL. Need
- * only be called when the value of the primitive
- * was 0. ie the protocol should return 0 on nulls
- * and the caller will then check if it was actually null
- * For boolean this is false.
+ * Was the last primitive read really a NULL. Need only be called when the
+ * value of the primitive was 0. ie the protocol should return 0 on nulls and
+ * the caller will then check if it was actually null For boolean this is
+ * false.
*/
public boolean lastPrimitiveWasNull() throws TException;
/**
- * Write a null
+ * Write a null
*/
public void writeNull() throws TException;