You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2010/01/21 10:52:48 UTC
svn commit: r901625 [2/6] - in /hadoop/hive/trunk: ./ serde/src/java/org/apache/hadoop/hive/serde2/ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/ serde/src/java/org/apache/hadoop/hive/serde2/columnar/ serde/src/java/org/apache/hadoop/hiv...

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/BytesRefWritable.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/BytesRefWritable.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/BytesRefWritable.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/BytesRefWritable.java Thu Jan 21 09:52:44 2010
@@ -39,9 +39,8 @@
   int start = 0;
   int length = 0;
   byte[] bytes = null;
-  
+
   LazyDecompressionCallback lazyDecompressObj;
-  
 
   /**
    * Create a zero-size bytes.
@@ -78,7 +77,7 @@
     start = offset;
     length = len;
   }
-  
+
   /**
    * Create a BytesRefWritable referenced to one section of the given bytes. The
    * argument <tt>lazyDecompressData</tt> refers to a LazyDecompressionCallback
@@ -86,7 +85,8 @@
    * uncompressed bytes of <tt>lazyDecompressData</tt>. Use <tt>offset</tt> and
    * <tt>len</tt> after uncompressing the data.
    */
-  public BytesRefWritable(LazyDecompressionCallback lazyDecompressData, int offset, int len) {
+  public BytesRefWritable(LazyDecompressionCallback lazyDecompressData,
+      int offset, int len) {
     lazyDecompressObj = lazyDecompressData;
     start = offset;
     length = len;
@@ -102,7 +102,7 @@
    * Returns a copy of the underlying bytes referenced by this instance.
    * 
    * @return a new copied byte array
-   * @throws IOException 
+   * @throws IOException
    */
   public byte[] getBytesCopy() throws IOException {
     lazyDecompress();
@@ -113,7 +113,8 @@
 
   /**
    * Returns the underlying bytes.
-   * @throws IOException 
+   * 
+   * @throws IOException
    */
   public byte[] getData() throws IOException {
     lazyDecompress();
@@ -132,7 +133,7 @@
     length = len;
     lazyDecompressObj = null;
   }
-  
+
   /**
    * readFields() will corrupt the array. So use the set method whenever
    * possible.
@@ -176,11 +177,13 @@
   }
 
   /** {@inheritDoc} */
+  @Override
   public int hashCode() {
     return super.hashCode();
   }
 
   /** {@inheritDoc} */
+  @Override
   public String toString() {
     StringBuffer sb = new StringBuffer(3 * length);
     for (int idx = start; idx < length; idx++) {
@@ -201,22 +204,26 @@
   /** {@inheritDoc} */
   @Override
   public int compareTo(BytesRefWritable other) {
-    if (other == null)
+    if (other == null) {
       throw new IllegalArgumentException("Argument can not be null.");
-    if (this == other)
+    }
+    if (this == other) {
       return 0;
+    }
     try {
-      return WritableComparator.compareBytes(getData(), start, getLength(), other
-          .getData(), other.start, other.getLength());
+      return WritableComparator.compareBytes(getData(), start, getLength(),
+          other.getData(), other.start, other.getLength());
     } catch (IOException e) {
       throw new RuntimeException(e);
     }
   }
 
   /** {@inheritDoc} */
+  @Override
   public boolean equals(Object right_obj) {
-    if (right_obj == null || !(right_obj instanceof BytesRefWritable))
+    if (right_obj == null || !(right_obj instanceof BytesRefWritable)) {
       return false;
+    }
     return compareTo((BytesRefWritable) right_obj) == 0;
   }
 

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java Thu Jan 21 09:52:44 2010
@@ -26,8 +26,8 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
@@ -55,6 +55,7 @@
   // We need some initial values in case user don't call initialize()
   private ObjectInspector cachedObjectInspector;
 
+  @Override
   public String toString() {
     return getClass().toString()
         + "["
@@ -89,14 +90,14 @@
     // ColumnarObject uses same ObjectInpector as LazyStruct
     cachedObjectInspector = LazyFactory.createColumnarStructInspector(
         serdeParams.getColumnNames(), serdeParams.getColumnTypes(), serdeParams
-            .getSeparators(), serdeParams.getNullSequence(), serdeParams.isEscaped(),
-            serdeParams.getEscapeChar());
+            .getSeparators(), serdeParams.getNullSequence(), serdeParams
+            .isEscaped(), serdeParams.getEscapeChar());
+
+    java.util.ArrayList<Integer> notSkipIDs = ColumnProjectionUtils
+        .getReadColumnIDs(job);
 
-    
-    java.util.ArrayList<Integer> notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(job);
-    
     cachedLazyStruct = new ColumnarStruct(cachedObjectInspector, notSkipIDs);
-    
+
     int size = serdeParams.getColumnTypes().size();
     field = new BytesRefWritable[size];
     for (int i = 0; i < size; i++) {
@@ -204,17 +205,17 @@
             && (declaredFields == null || declaredFields.get(i)
                 .getFieldObjectInspector().getCategory().equals(
                     Category.PRIMITIVE))) {
-          LazySimpleSerDe.serialize(serializeStream, 
-              SerDeUtils.getJSONString(f, foi),
+          LazySimpleSerDe.serialize(serializeStream, SerDeUtils.getJSONString(
+              f, foi),
               PrimitiveObjectInspectorFactory.javaStringObjectInspector,
               serdeParams.getSeparators(), 1, serdeParams.getNullSequence(),
-              serdeParams.isEscaped(), serdeParams.getEscapeChar(),
-              serdeParams.getNeedsEscape());
+              serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams
+                  .getNeedsEscape());
         } else {
-          LazySimpleSerDe.serialize(serializeStream, f, foi, 
-              serdeParams.getSeparators(), 1, serdeParams.getNullSequence(),
-              serdeParams.isEscaped(), serdeParams.getEscapeChar(),
-              serdeParams.getNeedsEscape());
+          LazySimpleSerDe.serialize(serializeStream, f, foi, serdeParams
+              .getSeparators(), 1, serdeParams.getNullSequence(), serdeParams
+              .isEscaped(), serdeParams.getEscapeChar(), serdeParams
+              .getNeedsEscape());
         }
 
         field[i].set(serializeStream.getData(), count, serializeStream

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java Thu Jan 21 09:52:44 2010
@@ -20,9 +20,10 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
 import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
 import org.apache.hadoop.hive.serde2.lazy.LazyObject;
@@ -31,8 +32,6 @@
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.io.Text;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 
 /**
  * ColumnarStruct is different from LazyStruct in that ColumnarStruct's field
@@ -47,11 +46,11 @@
    * The fields of the struct.
    */
   LazyObject[] fields;
-  
+
   private static final Log LOG = LogFactory.getLog(ColumnarStruct.class);
-  
-  int[]   prjColIDs   = null;   // list of projected column IDs
-  
+
+  int[] prjColIDs = null; // list of projected column IDs
+
   /**
    * Construct a ColumnarStruct object with the TypeInfo. It creates the first
    * level object at the first place
@@ -62,7 +61,7 @@
   public ColumnarStruct(ObjectInspector oi) {
     this(oi, null);
   }
-  
+
   /**
    * Construct a ColumnarStruct object with the TypeInfo. It creates the first
    * level object at the first place
@@ -70,44 +69,50 @@
    * @param oi
    *          the ObjectInspector representing the type of this LazyStruct.
    * @param notSkippedColumnIDs
-   * 		  the column ids that should not be skipped       
+   *          the column ids that should not be skipped
    */
-  public ColumnarStruct(ObjectInspector oi, ArrayList<Integer> notSkippedColumnIDs) {
-    List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
+  public ColumnarStruct(ObjectInspector oi,
+      ArrayList<Integer> notSkippedColumnIDs) {
+    List<? extends StructField> fieldRefs = ((StructObjectInspector) oi)
+        .getAllStructFieldRefs();
     int num = fieldRefs.size();
     fields = new LazyObject[num];
     cachedByteArrayRef = new ByteArrayRef[num];
     rawBytesField = new BytesRefWritable[num];
     fieldSkipped = new boolean[num];
     inited = new boolean[num];
-    
-    //if no columns is set to be skipped, add all columns in 'notSkippedColumnIDs'
-		if (notSkippedColumnIDs == null || notSkippedColumnIDs.size() == 0) {
-			for (int i = 0; i < num; i++)
-				notSkippedColumnIDs.add(i);
-		}
-    
+
+    // if no columns is set to be skipped, add all columns in
+    // 'notSkippedColumnIDs'
+    if (notSkippedColumnIDs == null || notSkippedColumnIDs.size() == 0) {
+      for (int i = 0; i < num; i++) {
+        notSkippedColumnIDs.add(i);
+      }
+    }
+
     for (int i = 0; i < num; i++) {
-      fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector());
+      fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i)
+          .getFieldObjectInspector());
       cachedByteArrayRef[i] = new ByteArrayRef();
-      if(!notSkippedColumnIDs.contains(i)){
-      	fieldSkipped[i] = true;
-      	inited[i] = true;
-      } else
-      	inited[i] = false;
-    }
-    
-		// maintain a list of non-NULL column IDs
-		int min = notSkippedColumnIDs.size() > num ? num : notSkippedColumnIDs
-		    .size();
-		prjColIDs = new int[min];
-		for (int i = 0, index = 0; i < notSkippedColumnIDs.size(); ++i) {
-			int readCol = notSkippedColumnIDs.get(i).intValue();
-			if (readCol < num) {
-				prjColIDs[index] = readCol;
-				index++;
-			}
-		}
+      if (!notSkippedColumnIDs.contains(i)) {
+        fieldSkipped[i] = true;
+        inited[i] = true;
+      } else {
+        inited[i] = false;
+      }
+    }
+
+    // maintain a list of non-NULL column IDs
+    int min = notSkippedColumnIDs.size() > num ? num : notSkippedColumnIDs
+        .size();
+    prjColIDs = new int[min];
+    for (int i = 0, index = 0; i < notSkippedColumnIDs.size(); ++i) {
+      int readCol = notSkippedColumnIDs.get(i).intValue();
+      if (readCol < num) {
+        prjColIDs[index] = readCol;
+        index++;
+      }
+    }
   }
 
   /**
@@ -152,8 +157,9 @@
    * @return The value of the field
    */
   protected Object uncheckedGetField(int fieldID, Text nullSequence) {
-    if (fieldSkipped[fieldID])
+    if (fieldSkipped[fieldID]) {
       return null;
+    }
     if (!inited[fieldID]) {
       BytesRefWritable passedInField = rawBytesField[fieldID];
       try {
@@ -165,35 +171,36 @@
           .getStart(), passedInField.getLength());
       inited[fieldID] = true;
     }
-    
+
     byte[] data = cachedByteArrayRef[fieldID].getData();
     int fieldLen = rawBytesField[fieldID].length;
-    
+
     if (fieldLen == nullSequence.getLength()
-        && LazyUtils.compare(data, rawBytesField[fieldID].getStart(),
-            fieldLen, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) {
+        && LazyUtils.compare(data, rawBytesField[fieldID].getStart(), fieldLen,
+            nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) {
       return null;
     }
 
     return fields[fieldID].getObject();
   }
-  
-  /*  ============================  [PERF] ===================================
-   *  This function is called for every row. Setting up the selected/projected 
-   *  columns at the first call, and don't do that for the following calls. 
-   *  Ideally this should be done in the constructor where we don't need to 
-   *  branch in the function for each row. 
-   *  =========================================================================
+
+  /*
+   * ============================ [PERF] ===================================
+   * This function is called for every row. Setting up the selected/projected
+   * columns at the first call, and don't do that for the following calls.
+   * Ideally this should be done in the constructor where we don't need to
+   * branch in the function for each row.
+   * =========================================================================
    */
   public void init(BytesRefArrayWritable cols) {
-    for (int i = 0; i < prjColIDs.length; ++i ) {
+    for (int i = 0; i < prjColIDs.length; ++i) {
       int fieldIndex = prjColIDs[i];
-      if(fieldIndex < cols.size()){
-      	rawBytesField[fieldIndex] = cols.unCheckedGet(fieldIndex);
-        inited[fieldIndex] = false;      	
+      if (fieldIndex < cols.size()) {
+        rawBytesField[fieldIndex] = cols.unCheckedGet(fieldIndex);
+        inited[fieldIndex] = false;
       } else {
-      	// select columns that actually do not exist in the file.
-      	fieldSkipped[fieldIndex] = true;
+        // select columns that actually do not exist in the file.
+        fieldSkipped[fieldIndex] = true;
       }
     }
   }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyDecompressionCallback.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyDecompressionCallback.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyDecompressionCallback.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyDecompressionCallback.java Thu Jan 21 09:52:44 2010
@@ -20,9 +20,8 @@
 
 import java.io.IOException;
 
-
 /**
- * Used to call back lazy decompression process. 
+ * Used to call back lazy decompression process.
  * 
  * @see #BytesRefWritable
  */

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ByteWritable.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ByteWritable.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ByteWritable.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ByteWritable.java Thu Jan 21 09:52:44 2010
@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hive.serde2.io;
 
-
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
@@ -47,33 +46,36 @@
   public void set(byte value) {
     this.value = value;
   }
-  
+
   public byte get() {
     return value;
   }
-  
+
   /** Compares two ByteWritables. */
   public int compareTo(Object o) {
-    int thisValue = this.value;
+    int thisValue = value;
     int thatValue = ((ByteWritable) o).value;
     return thisValue - thatValue;
   }
 
+  @Override
   public boolean equals(Object o) {
     if (o == null || o.getClass() != ByteWritable.class) {
       return false;
     }
-    return get() == ((ByteWritable)o).get();
+    return get() == ((ByteWritable) o).get();
   }
-  
+
+  @Override
   public int hashCode() {
     return value;
   }
-  
+
+  @Override
   public String toString() {
     return String.valueOf(get());
   }
-  
+
   /** A Comparator optimized for BytesWritable. */
   public static class Comparator extends WritableComparator {
     public Comparator() {
@@ -83,6 +85,7 @@
     /**
      * Compare the buffers in serialized form.
      */
+    @Override
     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
       int a1 = b1[s1];
       int a2 = b2[s2];

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java Thu Jan 21 09:52:44 2010
@@ -52,30 +52,37 @@
     out.writeDouble(value);
   }
 
-  public void set(double value) { this.value = value; }
+  public void set(double value) {
+    this.value = value;
+  }
 
-  public double get() { return value; }
+  public double get() {
+    return value;
+  }
 
   /**
    * Returns true iff <code>o</code> is a DoubleWritable with the same value.
    */
+  @Override
   public boolean equals(Object o) {
     if (!(o instanceof DoubleWritable)) {
       return false;
     }
-    DoubleWritable other = (DoubleWritable)o;
-    return this.value == other.value;
+    DoubleWritable other = (DoubleWritable) o;
+    return value == other.value;
   }
 
+  @Override
   public int hashCode() {
-    return (int)Double.doubleToLongBits(value);
+    return (int) Double.doubleToLongBits(value);
   }
 
   public int compareTo(Object o) {
-    DoubleWritable other = (DoubleWritable)o;
+    DoubleWritable other = (DoubleWritable) o;
     return (value < other.value ? -1 : (value == other.value ? 0 : 1));
   }
 
+  @Override
   public String toString() {
     return Double.toString(value);
   }
@@ -86,18 +93,16 @@
       super(DoubleWritable.class);
     }
 
-    public int compare(byte[] b1, int s1, int l1,
-                       byte[] b2, int s2, int l2) {
+    @Override
+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
       double thisValue = readDouble(b1, s1);
       double thatValue = readDouble(b2, s2);
       return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
     }
   }
 
-  static {                                        // register this comparator
+  static { // register this comparator
     WritableComparator.define(DoubleWritable.class, new Comparator());
   }
 
 }
-
-      

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ShortWritable.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ShortWritable.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ShortWritable.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/ShortWritable.java Thu Jan 21 09:52:44 2010
@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hive.serde2.io;
 
-
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
@@ -47,28 +46,31 @@
   public void set(short value) {
     this.value = value;
   }
-  
+
   public short get() {
     return value;
   }
-  
+
+  @Override
   public boolean equals(Object o) {
     if (o == null || o.getClass() != ShortWritable.class) {
       return false;
     }
-    return get() == ((ShortWritable)o).get();
+    return get() == ((ShortWritable) o).get();
   }
 
+  @Override
   public int hashCode() {
     return value;
   }
-  
+
+  @Override
   public String toString() {
     return String.valueOf(get());
   }
-  
+
   public int compareTo(Object o) {
-    int thisValue = this.value;
+    int thisValue = value;
     int thatValue = ((ShortWritable) o).value;
     return thisValue - thatValue;
   }
@@ -82,9 +84,10 @@
     /**
      * Compare the buffers in serialized form.
      */
+    @Override
     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
-      int a1 = (short)(readUnsignedShort(b1, s1));
-      int a2 = (short)(readUnsignedShort(b2, s2));
+      int a1 = (short) (readUnsignedShort(b1, s1));
+      int a2 = (short) (readUnsignedShort(b2, s2));
       return a1 - a2;
     }
   }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java Thu Jan 21 09:52:44 2010
@@ -18,21 +18,24 @@
 
 package org.apache.hadoop.hive.serde2.thrift;
 
-import org.apache.hadoop.conf.Configuration;
 import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
 import org.apache.thrift.TException;
 
 /**
- * An interface for TProtocols that need to have properties passed in to 
- *  initialize them. e.g., separators for TCTLSeparatedProtocol.
- *  If there was a regex like deserializer, the regex could be passed in
- *  in this manner.
+ * An interface for TProtocols that need to have properties passed in to
+ * initialize them. e.g., separators for TCTLSeparatedProtocol. If there was a
+ * regex like deserializer, the regex could be passed in in this manner.
  */
 public interface ConfigurableTProtocol {
   /**
    * Initialize the TProtocol
-   * @param conf System properties
-   * @param tbl  table properties
+   * 
+   * @param conf
+   *          System properties
+   * @param tbl
+   *          table properties
    * @throws TException
    */
   public void initialize(Configuration conf, Properties tbl) throws TException;

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java Thu Jan 21 09:52:44 2010
@@ -16,61 +16,65 @@
  * limitations under the License.
  */
 
-
 package org.apache.hadoop.hive.serde2.thrift;
 
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+import java.util.Properties;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-
-import org.apache.thrift.TException;
-import org.apache.thrift.transport.*;
-import org.apache.thrift.protocol.*;
-import java.util.*;
-import java.io.*;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.io.Text;
-
-import java.util.Properties;
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TField;
+import org.apache.thrift.protocol.TList;
+import org.apache.thrift.protocol.TMap;
+import org.apache.thrift.protocol.TMessage;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.protocol.TProtocolFactory;
+import org.apache.thrift.protocol.TSet;
+import org.apache.thrift.protocol.TStruct;
+import org.apache.thrift.transport.TTransport;
 
 /**
  * An implementation of the Thrift Protocol for binary sortable records.
  * 
- * The data format:
- * NULL:  a single byte \0
- * NON-NULL Primitives: ALWAYS prepend a single byte \1, and then:
- *   Boolean: FALSE = \1, TRUE = \2
- *   Byte:    flip the sign-bit to make sure negative comes before positive
- *   Short:   flip the sign-bit to make sure negative comes before positive
- *   Int:     flip the sign-bit to make sure negative comes before positive
- *   Long:    flip the sign-bit to make sure negative comes before positive
- *   Double:  flip the sign-bit for positive double, and all bits for negative double values
- *   String:  NULL-terminated UTF-8 string, with NULL escaped to \1 \1, and \1 escaped to \1 \2
- * NON-NULL Complex Types:
- *   Struct:  first the single byte \1, and then one field by one field.
- *   List:    size stored as Int (see above), then one element by one element. 
- *   Map:     size stored as Int (see above), then one key by one value, and then the next pair and so on.
- *   Binary:  size stored as Int (see above), then the binary data in its original form
- *   
- * Note that the relative order of list/map/binary will be based on the size first (and elements one by one if 
- * the sizes are equal). 
+ * The data format: NULL: a single byte \0 NON-NULL Primitives: ALWAYS prepend a
+ * single byte \1, and then: Boolean: FALSE = \1, TRUE = \2 Byte: flip the
+ * sign-bit to make sure negative comes before positive Short: flip the sign-bit
+ * to make sure negative comes before positive Int: flip the sign-bit to make
+ * sure negative comes before positive Long: flip the sign-bit to make sure
+ * negative comes before positive Double: flip the sign-bit for positive double,
+ * and all bits for negative double values String: NULL-terminated UTF-8 string,
+ * with NULL escaped to \1 \1, and \1 escaped to \1 \2 NON-NULL Complex Types:
+ * Struct: first the single byte \1, and then one field by one field. List: size
+ * stored as Int (see above), then one element by one element. Map: size stored
+ * as Int (see above), then one key by one value, and then the next pair and so
+ * on. Binary: size stored as Int (see above), then the binary data in its
+ * original form
  * 
- * This protocol takes an additional parameter SERIALIZATION_SORT_ORDER which is a string containing only "+" and "-".
- * The length of the string should equal to the number of fields in the top-level struct for serialization.
- * "+" means the field should be sorted ascendingly, and "-" means descendingly. The sub fields in the same top-level
- * field will have the same sort order. 
+ * Note that the relative order of list/map/binary will be based on the size
+ * first (and elements one by one if the sizes are equal).
  * 
- * This is not thrift compliant in that it doesn't write out field ids
- * so things cannot actually be versioned.
+ * This protocol takes an additional parameter SERIALIZATION_SORT_ORDER which is
+ * a string containing only "+" and "-". The length of the string should equal
+ * to the number of fields in the top-level struct for serialization. "+" means
+ * the field should be sorted ascendingly, and "-" means descendingly. The sub
+ * fields in the same top-level field will have the same sort order.
+ * 
+ * This is not thrift compliant in that it doesn't write out field ids so things
+ * cannot actually be versioned.
  */
-public class TBinarySortableProtocol extends TProtocol implements ConfigurableTProtocol,
-    WriteNullsProtocol, WriteTextProtocol {
+public class TBinarySortableProtocol extends TProtocol implements
+    ConfigurableTProtocol, WriteNullsProtocol, WriteTextProtocol {
+
+  final static Log LOG = LogFactory.getLog(TBinarySortableProtocol.class
+      .getName());
 
-  final static Log LOG = LogFactory.getLog(TBinarySortableProtocol.class.getName());
+  static byte ORDERED_TYPE = (byte) -1;
 
-  static byte ORDERED_TYPE = (byte)-1;
-  
   /**
    * Factory for TBinarySortableProtocol objects
    */
@@ -87,75 +91,89 @@
   }
 
   /**
-   * The stack level of the current field.  Top-level fields have a stackLevel value of 1.
-   * Each nested struct/list/map will increase the stackLevel value by 1.
+   * The stack level of the current field. Top-level fields have a stackLevel
+   * value of 1. Each nested struct/list/map will increase the stackLevel value
+   * by 1.
    */
   int stackLevel;
   /**
-   * The field ID in the top level struct.  This is used to determine whether this field
-   * should be sorted ascendingly or descendingly.  
+   * The field ID in the top level struct. This is used to determine whether
+   * this field should be sorted ascendingly or descendingly.
    */
   int topLevelStructFieldID;
   /**
-   * A string that consists of only "+" and "-".  It should have the same length as the number
-   * of fields in the top level struct. "+" means the corresponding field is sorted ascendingly
-   * and "-" means the corresponding field is sorted descendingly.
+   * A string that consists of only "+" and "-". It should have the same length
+   * as the number of fields in the top level struct. "+" means the
+   * corresponding field is sorted ascendingly and "-" means the corresponding
+   * field is sorted descendingly.
    */
   String sortOrder;
   /**
-   * Whether the current field is sorted ascendingly.  Always equals to
+   * Whether the current field is sorted ascendingly. Always equals to
    * sortOrder.charAt(topLevelStructFieldID) != '-'
    */
-  boolean ascending; 
-  
+  boolean ascending;
+
   public void initialize(Configuration conf, Properties tbl) throws TException {
     sortOrder = tbl.getProperty(Constants.SERIALIZATION_SORT_ORDER);
     if (sortOrder == null) {
       sortOrder = "";
     }
-    for(int i=0; i<sortOrder.length(); i++) {
-      char c = sortOrder.charAt(i); 
+    for (int i = 0; i < sortOrder.length(); i++) {
+      char c = sortOrder.charAt(i);
       if (c != '+' && c != '-') {
-        throw new TException(Constants.SERIALIZATION_SORT_ORDER + " should be a string consists of only '+' and '-'!");
+        throw new TException(Constants.SERIALIZATION_SORT_ORDER
+            + " should be a string consists of only '+' and '-'!");
       }
     }
     LOG.info("Sort order is \"" + sortOrder + "\"");
   }
 
+  @Override
   public void writeMessageBegin(TMessage message) throws TException {
   }
 
+  @Override
   public void writeMessageEnd() throws TException {
   }
 
+  @Override
   public void writeStructBegin(TStruct struct) throws TException {
     stackLevel++;
     if (stackLevel == 1) {
       topLevelStructFieldID = 0;
-      ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder.charAt(topLevelStructFieldID) != '-'); 
+      ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder
+          .charAt(topLevelStructFieldID) != '-');
     } else {
       writeRawBytes(nonNullByte, 0, 1);
-      // If the struct is null and level > 1, DynamicSerDe will call writeNull();
-    }    
+      // If the struct is null and level > 1, DynamicSerDe will call
+      // writeNull();
+    }
   }
 
+  @Override
   public void writeStructEnd() throws TException {
-    stackLevel --;
+    stackLevel--;
   }
 
+  @Override
   public void writeFieldBegin(TField field) throws TException {
   }
 
+  @Override
   public void writeFieldEnd() throws TException {
     if (stackLevel == 1) {
-      topLevelStructFieldID ++;
-      ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder.charAt(topLevelStructFieldID) != '-'); 
+      topLevelStructFieldID++;
+      ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder
+          .charAt(topLevelStructFieldID) != '-');
     }
   }
 
+  @Override
   public void writeFieldStop() {
   }
 
+  @Override
   public void writeMapBegin(TMap map) throws TException {
     stackLevel++;
     if (map == null) {
@@ -165,10 +183,12 @@
     }
   }
 
+  @Override
   public void writeMapEnd() throws TException {
-    stackLevel --;
+    stackLevel--;
   }
 
+  @Override
   public void writeListBegin(TList list) throws TException {
     stackLevel++;
     if (list == null) {
@@ -178,10 +198,12 @@
     }
   }
 
+  @Override
   public void writeListEnd() throws TException {
-    stackLevel --;
+    stackLevel--;
   }
-  
+
+  @Override
   public void writeSetBegin(TSet set) throws TException {
     stackLevel++;
     if (set == null) {
@@ -191,14 +213,17 @@
     }
   }
 
+  @Override
   public void writeSetEnd() throws TException {
-    stackLevel --;
+    stackLevel--;
   }
 
   byte[] rawBytesBuffer;
+
   // This method takes care of bit-flipping for descending order
   // Declare this method as final for performance reasons
-  final private void writeRawBytes(byte[] bytes, int begin, int length) throws TException {
+  final private void writeRawBytes(byte[] bytes, int begin, int length)
+      throws TException {
     if (ascending) {
       trans_.write(bytes, begin, length);
     } else {
@@ -206,97 +231,109 @@
       if (rawBytesBuffer == null || rawBytesBuffer.length < bytes.length) {
         rawBytesBuffer = new byte[bytes.length];
       }
-      for(int i=begin; i<begin+length; i++) {
-        rawBytesBuffer[i] = (byte)(~bytes[i]);
+      for (int i = begin; i < begin + length; i++) {
+        rawBytesBuffer[i] = (byte) (~bytes[i]);
       }
       trans_.write(rawBytesBuffer, begin, length);
     }
   }
-  
-  private byte [] bout = new byte[1];
+
+  private final byte[] bout = new byte[1];
+
+  @Override
   public void writeBool(boolean b) throws TException {
-    bout[0] = (b ? (byte)2 : (byte)1); 
+    bout[0] = (b ? (byte) 2 : (byte) 1);
     writeRawBytes(bout, 0, 1);
   }
 
+  @Override
   public void writeByte(byte b) throws TException {
     writeRawBytes(nonNullByte, 0, 1);
-    // Make sure negative numbers comes before positive numbers 
-    bout[0] = (byte) (b ^ 0x80); 
+    // Make sure negative numbers comes before positive numbers
+    bout[0] = (byte) (b ^ 0x80);
     writeRawBytes(bout, 0, 1);
   }
 
-  private byte[] i16out = new byte[2];
+  private final byte[] i16out = new byte[2];
+
+  @Override
   public void writeI16(short i16) throws TException {
-    i16out[0] = (byte)(0xff & ((i16 >> 8) ^ 0x80));
-    i16out[1] = (byte)(0xff & (i16));
+    i16out[0] = (byte) (0xff & ((i16 >> 8) ^ 0x80));
+    i16out[1] = (byte) (0xff & (i16));
     writeRawBytes(nonNullByte, 0, 1);
     writeRawBytes(i16out, 0, 2);
   }
 
-  private byte[] i32out = new byte[4];
+  private final byte[] i32out = new byte[4];
+
+  @Override
   public void writeI32(int i32) throws TException {
-    i32out[0] = (byte)(0xff & ((i32 >> 24) ^ 0x80));
-    i32out[1] = (byte)(0xff & (i32 >> 16));
-    i32out[2] = (byte)(0xff & (i32 >> 8));
-    i32out[3] = (byte)(0xff & (i32));
+    i32out[0] = (byte) (0xff & ((i32 >> 24) ^ 0x80));
+    i32out[1] = (byte) (0xff & (i32 >> 16));
+    i32out[2] = (byte) (0xff & (i32 >> 8));
+    i32out[3] = (byte) (0xff & (i32));
     writeRawBytes(nonNullByte, 0, 1);
     writeRawBytes(i32out, 0, 4);
   }
 
-  private byte[] i64out = new byte[8];
+  private final byte[] i64out = new byte[8];
+
+  @Override
   public void writeI64(long i64) throws TException {
-    i64out[0] = (byte)(0xff & ((i64 >> 56) ^ 0x80));
-    i64out[1] = (byte)(0xff & (i64 >> 48));
-    i64out[2] = (byte)(0xff & (i64 >> 40));
-    i64out[3] = (byte)(0xff & (i64 >> 32));
-    i64out[4] = (byte)(0xff & (i64 >> 24));
-    i64out[5] = (byte)(0xff & (i64 >> 16));
-    i64out[6] = (byte)(0xff & (i64 >> 8));
-    i64out[7] = (byte)(0xff & (i64));
+    i64out[0] = (byte) (0xff & ((i64 >> 56) ^ 0x80));
+    i64out[1] = (byte) (0xff & (i64 >> 48));
+    i64out[2] = (byte) (0xff & (i64 >> 40));
+    i64out[3] = (byte) (0xff & (i64 >> 32));
+    i64out[4] = (byte) (0xff & (i64 >> 24));
+    i64out[5] = (byte) (0xff & (i64 >> 16));
+    i64out[6] = (byte) (0xff & (i64 >> 8));
+    i64out[7] = (byte) (0xff & (i64));
     writeRawBytes(nonNullByte, 0, 1);
     writeRawBytes(i64out, 0, 8);
   }
 
+  @Override
   public void writeDouble(double dub) throws TException {
     long i64 = Double.doubleToLongBits(dub);
     if ((i64 & (1L << 63)) != 0) {
       // negative numbers, flip all bits
-      i64out[0] = (byte)(0xff & ((i64 >> 56) ^ 0xff));
-      i64out[1] = (byte)(0xff & ((i64 >> 48) ^ 0xff));
-      i64out[2] = (byte)(0xff & ((i64 >> 40) ^ 0xff));
-      i64out[3] = (byte)(0xff & ((i64 >> 32) ^ 0xff));
-      i64out[4] = (byte)(0xff & ((i64 >> 24) ^ 0xff));
-      i64out[5] = (byte)(0xff & ((i64 >> 16) ^ 0xff));
-      i64out[6] = (byte)(0xff & ((i64 >> 8) ^ 0xff));
-      i64out[7] = (byte)(0xff & ((i64) ^ 0xff));
+      i64out[0] = (byte) (0xff & ((i64 >> 56) ^ 0xff));
+      i64out[1] = (byte) (0xff & ((i64 >> 48) ^ 0xff));
+      i64out[2] = (byte) (0xff & ((i64 >> 40) ^ 0xff));
+      i64out[3] = (byte) (0xff & ((i64 >> 32) ^ 0xff));
+      i64out[4] = (byte) (0xff & ((i64 >> 24) ^ 0xff));
+      i64out[5] = (byte) (0xff & ((i64 >> 16) ^ 0xff));
+      i64out[6] = (byte) (0xff & ((i64 >> 8) ^ 0xff));
+      i64out[7] = (byte) (0xff & ((i64) ^ 0xff));
     } else {
       // positive numbers, flip just the first bit
-      i64out[0] = (byte)(0xff & ((i64 >> 56) ^ 0x80));
-      i64out[1] = (byte)(0xff & (i64 >> 48));
-      i64out[2] = (byte)(0xff & (i64 >> 40));
-      i64out[3] = (byte)(0xff & (i64 >> 32));
-      i64out[4] = (byte)(0xff & (i64 >> 24));
-      i64out[5] = (byte)(0xff & (i64 >> 16));
-      i64out[6] = (byte)(0xff & (i64 >> 8));
-      i64out[7] = (byte)(0xff & (i64));
+      i64out[0] = (byte) (0xff & ((i64 >> 56) ^ 0x80));
+      i64out[1] = (byte) (0xff & (i64 >> 48));
+      i64out[2] = (byte) (0xff & (i64 >> 40));
+      i64out[3] = (byte) (0xff & (i64 >> 32));
+      i64out[4] = (byte) (0xff & (i64 >> 24));
+      i64out[5] = (byte) (0xff & (i64 >> 16));
+      i64out[6] = (byte) (0xff & (i64 >> 8));
+      i64out[7] = (byte) (0xff & (i64));
     }
     writeRawBytes(nonNullByte, 0, 1);
     writeRawBytes(i64out, 0, 8);
   }
 
-  final protected byte[] nullByte = new byte[] {0};
-  final protected byte[] nonNullByte = new byte[] {1};
+  final protected byte[] nullByte = new byte[] { 0 };
+  final protected byte[] nonNullByte = new byte[] { 1 };
   /**
-   * The escaped byte sequence for the null byte.
-   * This cannot be changed alone without changing the readString() code.
+   * The escaped byte sequence for the null byte. This cannot be changed alone
+   * without changing the readString() code.
    */
-  final protected byte[] escapedNull = new byte[] {1,1};
+  final protected byte[] escapedNull = new byte[] { 1, 1 };
   /**
-   * The escaped byte sequence for the "\1" byte.
-   * This cannot be changed alone without changing the readString() code.
+   * The escaped byte sequence for the "\1" byte. This cannot be changed alone
+   * without changing the readString() code.
    */
-  final protected byte[] escapedOne = new byte[] {1,2};
+  final protected byte[] escapedOne = new byte[] { 1, 2 };
+
+  @Override
   public void writeString(String str) throws TException {
     byte[] dat;
     try {
@@ -307,6 +344,7 @@
     writeTextBytes(dat, 0, dat.length);
   }
 
+  @Override
   public void writeBinary(byte[] bin) throws TException {
     if (bin == null) {
       writeRawBytes(nullByte, 0, 1);
@@ -316,52 +354,69 @@
     }
   }
 
+  @Override
   public TMessage readMessageBegin() throws TException {
-    return new TMessage(); 
+    return new TMessage();
   }
 
+  @Override
   public void readMessageEnd() throws TException {
   }
 
   TStruct tstruct = new TStruct();
+
+  @Override
   public TStruct readStructBegin() throws TException {
     stackLevel++;
     if (stackLevel == 1) {
       topLevelStructFieldID = 0;
-      ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder.charAt(topLevelStructFieldID) != '-'); 
+      ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder
+          .charAt(topLevelStructFieldID) != '-');
     } else {
       // is this a null?
-      // only read the is-null byte for level > 1 because the top-level struct can never be null.
-      if (readIsNull()) return null;
+      // only read the is-null byte for level > 1 because the top-level struct
+      // can never be null.
+      if (readIsNull()) {
+        return null;
+      }
     }
     return tstruct;
   }
 
+  @Override
   public void readStructEnd() throws TException {
-    stackLevel --;
+    stackLevel--;
   }
 
   TField f = null;
+
+  @Override
   public TField readFieldBegin() throws TException {
-    // slight hack to communicate to DynamicSerDe that the field ids are not being set but things are ordered.
-    f = new TField("", ORDERED_TYPE, (short)-1);
-    return  f;
+    // slight hack to communicate to DynamicSerDe that the field ids are not
+    // being set but things are ordered.
+    f = new TField("", ORDERED_TYPE, (short) -1);
+    return f;
   }
 
+  @Override
   public void readFieldEnd() throws TException {
     if (stackLevel == 1) {
-      topLevelStructFieldID ++;
-      ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder.charAt(topLevelStructFieldID) != '-'); 
+      topLevelStructFieldID++;
+      ascending = (topLevelStructFieldID >= sortOrder.length() || sortOrder
+          .charAt(topLevelStructFieldID) != '-');
     }
   }
 
   private TMap tmap = null;
+
   /**
-   * This method always return the same instance of TMap to avoid creating new instances.
-   * It is the responsibility of the caller to read the value before calling this method again.
+   * This method always return the same instance of TMap to avoid creating new
+   * instances. It is the responsibility of the caller to read the value before
+   * calling this method again.
    */
+  @Override
   public TMap readMapBegin() throws TException {
-    stackLevel ++;
+    stackLevel++;
     tmap = new TMap(ORDERED_TYPE, ORDERED_TYPE, readI32());
     if (tmap.size == 0 && lastPrimitiveWasNull()) {
       return null;
@@ -369,143 +424,162 @@
     return tmap;
   }
 
+  @Override
   public void readMapEnd() throws TException {
-    stackLevel --;
+    stackLevel--;
   }
 
   private TList tlist = null;
+
   /**
-   * This method always return the same instance of TList to avoid creating new instances.
-   * It is the responsibility of the caller to read the value before calling this method again.
+   * This method always return the same instance of TList to avoid creating new
+   * instances. It is the responsibility of the caller to read the value before
+   * calling this method again.
    */
+  @Override
   public TList readListBegin() throws TException {
-    stackLevel ++;
+    stackLevel++;
     tlist = new TList(ORDERED_TYPE, readI32());
     if (tlist.size == 0 && lastPrimitiveWasNull()) {
       return null;
-    }    
+    }
     return tlist;
   }
 
+  @Override
   public void readListEnd() throws TException {
-    stackLevel --;
+    stackLevel--;
   }
 
   private TSet set = null;
+
   /**
-   * This method always return the same instance of TSet to avoid creating new instances.
-   * It is the responsibility of the caller to read the value before calling this method again.
+   * This method always return the same instance of TSet to avoid creating new
+   * instances. It is the responsibility of the caller to read the value before
+   * calling this method again.
    */
+  @Override
   public TSet readSetBegin() throws TException {
-    stackLevel ++;
+    stackLevel++;
     set = new TSet(ORDERED_TYPE, readI32());
     if (set.size == 0 && lastPrimitiveWasNull()) {
       return null;
-    }    
+    }
     return set;
   }
 
+  @Override
   public void readSetEnd() throws TException {
-    stackLevel --;
+    stackLevel--;
   }
-  
+
   // This method takes care of bit-flipping for descending order
-  // Make this method final to improve performance. 
+  // Make this method final to improve performance.
   final private int readRawAll(byte[] buf, int off, int len) throws TException {
     int bytes = trans_.readAll(buf, off, len);
     if (!ascending) {
-      for(int i=off; i<off+bytes; i++) {
-        buf[i] = (byte)~buf[i];
+      for (int i = off; i < off + bytes; i++) {
+        buf[i] = (byte) ~buf[i];
       }
     }
     return bytes;
   }
 
+  @Override
   public boolean readBool() throws TException {
     readRawAll(bin, 0, 1);
     lastPrimitiveWasNull = (bin[0] == 0);
     return lastPrimitiveWasNull ? false : bin[0] == 2;
   }
 
-  private byte[] wasNull = new byte[1];
-  final public boolean readIsNull() throws TException { 
+  private final byte[] wasNull = new byte[1];
+
+  final public boolean readIsNull() throws TException {
     readRawAll(wasNull, 0, 1);
     lastPrimitiveWasNull = (wasNull[0] == 0);
     return lastPrimitiveWasNull;
   }
-  
-  private byte[] bin = new byte[1];
+
+  private final byte[] bin = new byte[1];
+
+  @Override
   public byte readByte() throws TException {
-    if (readIsNull()) return 0;
+    if (readIsNull()) {
+      return 0;
+    }
     readRawAll(bin, 0, 1);
-    return (byte)(bin[0] ^ 0x80);
+    return (byte) (bin[0] ^ 0x80);
   }
 
-  private byte[] i16rd = new byte[2];
+  private final byte[] i16rd = new byte[2];
+
+  @Override
   public short readI16() throws TException {
-    if (readIsNull()) return 0;
+    if (readIsNull()) {
+      return 0;
+    }
     readRawAll(i16rd, 0, 2);
-    return
-      (short)
-      ((((i16rd[0]^0x80) & 0xff) << 8) |
-       ((i16rd[1] & 0xff)));
+    return (short) ((((i16rd[0] ^ 0x80) & 0xff) << 8) | ((i16rd[1] & 0xff)));
   }
 
-  private byte[] i32rd = new byte[4];
+  private final byte[] i32rd = new byte[4];
+
+  @Override
   public int readI32() throws TException {
-    if (readIsNull()) return 0;
+    if (readIsNull()) {
+      return 0;
+    }
     readRawAll(i32rd, 0, 4);
-    return
-      (((i32rd[0]^0x80) & 0xff) << 24) |
-      ((i32rd[1] & 0xff) << 16) |
-      ((i32rd[2] & 0xff) <<  8) |
-      ((i32rd[3] & 0xff));
+    return (((i32rd[0] ^ 0x80) & 0xff) << 24) | ((i32rd[1] & 0xff) << 16)
+        | ((i32rd[2] & 0xff) << 8) | ((i32rd[3] & 0xff));
   }
 
-  private byte[] i64rd = new byte[8];
+  private final byte[] i64rd = new byte[8];
+
+  @Override
   public long readI64() throws TException {
-    if (readIsNull()) return 0;
+    if (readIsNull()) {
+      return 0;
+    }
     readRawAll(i64rd, 0, 8);
-    return
-      ((long)((i64rd[0]^0x80) & 0xff) << 56) |
-      ((long)(i64rd[1] & 0xff) << 48) |
-      ((long)(i64rd[2] & 0xff) << 40) |
-      ((long)(i64rd[3] & 0xff) << 32) |
-      ((long)(i64rd[4] & 0xff) << 24) |
-      ((long)(i64rd[5] & 0xff) << 16) |
-      ((long)(i64rd[6] & 0xff) <<  8) |
-      ((long)(i64rd[7] & 0xff));
+    return ((long) ((i64rd[0] ^ 0x80) & 0xff) << 56)
+        | ((long) (i64rd[1] & 0xff) << 48) | ((long) (i64rd[2] & 0xff) << 40)
+        | ((long) (i64rd[3] & 0xff) << 32) | ((long) (i64rd[4] & 0xff) << 24)
+        | ((long) (i64rd[5] & 0xff) << 16) | ((long) (i64rd[6] & 0xff) << 8)
+        | ((i64rd[7] & 0xff));
   }
 
+  @Override
   public double readDouble() throws TException {
-    if (readIsNull()) return 0;
+    if (readIsNull()) {
+      return 0;
+    }
     readRawAll(i64rd, 0, 8);
     long v = 0;
     if ((i64rd[0] & 0x80) != 0) {
       // Positive number
-      v = ((long)((i64rd[0]^0x80) & 0xff) << 56) |
-        ((long)(i64rd[1] & 0xff) << 48) |
-        ((long)(i64rd[2] & 0xff) << 40) |
-        ((long)(i64rd[3] & 0xff) << 32) |
-        ((long)(i64rd[4] & 0xff) << 24) |
-        ((long)(i64rd[5] & 0xff) << 16) |
-        ((long)(i64rd[6] & 0xff) <<  8) |
-        ((long)(i64rd[7] & 0xff));
+      v = ((long) ((i64rd[0] ^ 0x80) & 0xff) << 56)
+          | ((long) (i64rd[1] & 0xff) << 48) | ((long) (i64rd[2] & 0xff) << 40)
+          | ((long) (i64rd[3] & 0xff) << 32) | ((long) (i64rd[4] & 0xff) << 24)
+          | ((long) (i64rd[5] & 0xff) << 16) | ((long) (i64rd[6] & 0xff) << 8)
+          | ((i64rd[7] & 0xff));
     } else {
       // Negative number
-      v = ((long)((i64rd[0]^0xff) & 0xff) << 56) |
-        ((long)((i64rd[1]^0xff) & 0xff) << 48) |
-        ((long)((i64rd[2]^0xff) & 0xff) << 40) |
-        ((long)((i64rd[3]^0xff) & 0xff) << 32) |
-        ((long)((i64rd[4]^0xff) & 0xff) << 24) |
-        ((long)((i64rd[5]^0xff) & 0xff) << 16) |
-        ((long)((i64rd[6]^0xff) & 0xff) <<  8) |
-        ((long)((i64rd[7]^0xff) & 0xff));
+      v = ((long) ((i64rd[0] ^ 0xff) & 0xff) << 56)
+          | ((long) ((i64rd[1] ^ 0xff) & 0xff) << 48)
+          | ((long) ((i64rd[2] ^ 0xff) & 0xff) << 40)
+          | ((long) ((i64rd[3] ^ 0xff) & 0xff) << 32)
+          | ((long) ((i64rd[4] ^ 0xff) & 0xff) << 24)
+          | ((long) ((i64rd[5] ^ 0xff) & 0xff) << 16)
+          | ((long) ((i64rd[6] ^ 0xff) & 0xff) << 8)
+          | (((i64rd[7] ^ 0xff) & 0xff));
     }
     return Double.longBitsToDouble(v);
   }
 
   private byte[] stringBytes = new byte[1000];
+
+  @Override
   public String readString() throws TException {
     if (readIsNull()) {
       return null;
@@ -520,11 +594,11 @@
       if (bin[0] == 1) {
         // Escaped byte, unescape it.
         readRawAll(bin, 0, 1);
-        assert(bin[0] == 1 || bin[0] == 2);
-        bin[0] = (byte)(bin[0] - 1);
+        assert (bin[0] == 1 || bin[0] == 2);
+        bin[0] = (byte) (bin[0] - 1);
       }
       if (i == stringBytes.length) {
-        stringBytes = Arrays.copyOf(stringBytes, stringBytes.length*2);
+        stringBytes = Arrays.copyOf(stringBytes, stringBytes.length * 2);
       }
       stringBytes[i] = bin[0];
       i++;
@@ -537,15 +611,19 @@
     }
   }
 
+  @Override
   public byte[] readBinary() throws TException {
     int size = readI32();
-    if (lastPrimitiveWasNull) return null;
+    if (lastPrimitiveWasNull) {
+      return null;
+    }
     byte[] buf = new byte[size];
     readRawAll(buf, 0, size);
     return buf;
   }
 
   boolean lastPrimitiveWasNull;
+
   public boolean lastPrimitiveWasNull() throws TException {
     return lastPrimitiveWasNull;
   }
@@ -554,7 +632,6 @@
     writeRawBytes(nullByte, 0, 1);
   }
 
-  
   void writeTextBytes(byte[] bytes, int start, int length) throws TException {
     writeRawBytes(nonNullByte, 0, 1);
     int begin = 0;
@@ -563,7 +640,7 @@
       if (bytes[i] == 0 || bytes[i] == 1) {
         // Write the first part of the array
         if (i > begin) {
-          writeRawBytes(bytes, begin, i-begin);
+          writeRawBytes(bytes, begin, i - begin);
         }
         // Write the escaped byte.
         if (bytes[i] == 0) {
@@ -573,19 +650,19 @@
         }
         // Move the pointer to the next byte, since we have written
         // out the escaped byte in the block above already.
-        begin = i+1;
+        begin = i + 1;
       }
     }
     // Write the remaining part of the array
     if (i > begin) {
-      writeRawBytes(bytes, begin, i-begin);
+      writeRawBytes(bytes, begin, i - begin);
     }
     // Write the terminating NULL byte
     writeRawBytes(nullByte, 0, 1);
   }
-  
+
   public void writeText(Text text) throws TException {
     writeTextBytes(text.getBytes(), 0, text.getLength());
   }
-  
+
 }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java Thu Jan 21 09:52:44 2010
@@ -16,42 +16,49 @@
  * limitations under the License.
  */
 
-
 package org.apache.hadoop.hive.serde2.thrift;
 
+import java.io.EOFException;
+import java.nio.charset.CharacterCodingException;
+import java.util.ArrayList;
+import java.util.NoSuchElementException;
+import java.util.Properties;
+import java.util.StringTokenizer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.io.Text;
-
 import org.apache.thrift.TException;
-import org.apache.thrift.transport.*;
-import org.apache.thrift.*;
-import org.apache.thrift.protocol.*;
-import java.util.*;
-import java.util.regex.Pattern;
-import java.util.regex.Matcher;
-import java.io.*;
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-
-import org.apache.hadoop.conf.Configuration;
-import java.util.Properties;
+import org.apache.thrift.protocol.TField;
+import org.apache.thrift.protocol.TList;
+import org.apache.thrift.protocol.TMap;
+import org.apache.thrift.protocol.TMessage;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.protocol.TProtocolFactory;
+import org.apache.thrift.protocol.TSet;
+import org.apache.thrift.protocol.TStruct;
+import org.apache.thrift.protocol.TType;
+import org.apache.thrift.transport.TTransport;
+import org.apache.thrift.transport.TTransportException;
 
 /**
- *
- * An implementation of the Thrift Protocol for ctl separated
- * records.
- * This is not thrift compliant in that it doesn't write out field ids
- * so things cannot actually be versioned.
+ * 
+ * An implementation of the Thrift Protocol for ctl separated records. This is
+ * not thrift compliant in that it doesn't write out field ids so things cannot
+ * actually be versioned.
  */
-public class TCTLSeparatedProtocol extends TProtocol 
-  implements ConfigurableTProtocol, WriteNullsProtocol, SkippableTProtocol {
+public class TCTLSeparatedProtocol extends TProtocol implements
+    ConfigurableTProtocol, WriteNullsProtocol, SkippableTProtocol {
 
-  final static Log LOG = LogFactory.getLog(TCTLSeparatedProtocol.class.getName());
+  final static Log LOG = LogFactory.getLog(TCTLSeparatedProtocol.class
+      .getName());
+
+  static byte ORDERED_TYPE = (byte) -1;
 
-  static byte ORDERED_TYPE = (byte)-1;
-  
   /**
    * Factory for JSON protocol objects
    */
@@ -83,20 +90,30 @@
   protected Pattern mapPattern;
 
   /**
-   * The quote character when supporting quotes with ability to not split across quoted entries. Like csv.
-   * Note that escaping the quote is not currently supported.
+   * The quote character when supporting quotes with ability to not split across
+   * quoted entries. Like csv. Note that escaping the quote is not currently
+   * supported.
    */
   protected String quote;
-  
 
   /**
    * Inspect the separators this instance is configured with.
    */
-  public String getPrimarySeparator() { return primarySeparator; }
-  public String getSecondarySeparator() { return secondarySeparator; }
-  public String getRowSeparator() { return rowSeparator; }
-  public String getMapSeparator() { return mapSeparator; }
+  public String getPrimarySeparator() {
+    return primarySeparator;
+  }
+
+  public String getSecondarySeparator() {
+    return secondarySeparator;
+  }
 
+  public String getRowSeparator() {
+    return rowSeparator;
+  }
+
+  public String getMapSeparator() {
+    return mapSeparator;
+  }
 
   /**
    * The transport stream is tokenized on the row separator
@@ -124,7 +141,6 @@
    */
   protected int innerIndex;
 
-
   /**
    * Is this the first field we're writing
    */
@@ -135,41 +151,38 @@
    */
   protected boolean firstInnerField;
 
-
   /**
    * Are we writing a map and need to worry about k/v separator?
    */
   protected boolean isMap;
 
-
   /**
-   * For writes, on what element are we on so we know when to use normal list separator or 
-   * for a map know when to use the k/v separator
+   * For writes, on what element are we on so we know when to use normal list
+   * separator or for a map know when to use the k/v separator
    */
   protected long elemIndex;
 
-
   /**
    * Are we currently on the top-level columns or parsing a column itself
    */
   protected boolean inner;
 
-
   /**
-   * For places where the separators are back to back, should we return a null or an empty string since it is ambiguous.
-   * This also applies to extra columns that are read but aren't in the current record.
+   * For places where the separators are back to back, should we return a null
+   * or an empty string since it is ambiguous. This also applies to extra
+   * columns that are read but aren't in the current record.
    */
   protected boolean returnNulls;
 
   /**
    * The transport being wrapped.
-   *
+   * 
    */
   final protected TTransport innerTransport;
 
-
   /**
-   * Strings used to lookup the various configurable paramaters of this protocol.
+   * Strings used to lookup the various configurable paramaters of this
+   * protocol.
    */
   public final static String ReturnNullsKey = "separators.return_nulls";
   public final static String BufferSizeKey = "separators.buffer_size";
@@ -186,10 +199,9 @@
 
   /**
    * The nullString in UTF-8 bytes
-   */ 
+   */
   protected Text nullText;
 
-
   /**
    * A convenience class for tokenizing a TTransport
    */
@@ -201,63 +213,65 @@
     final String separator;
     byte buf[];
 
-    public SimpleTransportTokenizer(TTransport trans, String separator, int buffer_length) {
+    public SimpleTransportTokenizer(TTransport trans, String separator,
+        int buffer_length) {
       this.trans = trans;
       this.separator = separator;
       buf = new byte[buffer_length];
-      // do not fill tokenizer until user requests since filling it could read in data
+      // do not fill tokenizer until user requests since filling it could read
+      // in data
       // not meant for this instantiation.
       fillTokenizer();
     }
 
     private boolean fillTokenizer() {
       try {
-          int length = trans.read(buf, 0, buf.length);
-          if(length <=0 ) {
-            tokenizer = new StringTokenizer("", separator, true);
-            return false;
-          }
-          String row;
-          try {
-            row = Text.decode(buf, 0, length);
-          } catch (CharacterCodingException e) {
-            throw new RuntimeException(e);
-          }
-          tokenizer = new StringTokenizer(row, separator, true);
-        } catch(TTransportException e) {
-          e.printStackTrace();
-          tokenizer = null;
+        int length = trans.read(buf, 0, buf.length);
+        if (length <= 0) {
+          tokenizer = new StringTokenizer("", separator, true);
           return false;
         }
-        return true;
+        String row;
+        try {
+          row = Text.decode(buf, 0, length);
+        } catch (CharacterCodingException e) {
+          throw new RuntimeException(e);
+        }
+        tokenizer = new StringTokenizer(row, separator, true);
+      } catch (TTransportException e) {
+        e.printStackTrace();
+        tokenizer = null;
+        return false;
+      }
+      return true;
     }
 
     public String nextToken() throws EOFException {
       StringBuffer ret = null;
       boolean done = false;
 
-      if(tokenizer == null) {
+      if (tokenizer == null) {
         fillTokenizer();
       }
 
-      while(! done) {
+      while (!done) {
 
-        if(! tokenizer.hasMoreTokens()) {
-          if(! fillTokenizer()) {
+        if (!tokenizer.hasMoreTokens()) {
+          if (!fillTokenizer()) {
             break;
           }
         }
         try {
           final String nextToken = tokenizer.nextToken();
 
-          if(nextToken.equals(separator)) {
+          if (nextToken.equals(separator)) {
             done = true;
-          } else if(ret == null) {
+          } else if (ret == null) {
             ret = new StringBuffer(nextToken);
           } else {
             ret.append(nextToken);
           }
-        } catch(NoSuchElementException e) {
+        } catch (NoSuchElementException e) {
           if (ret == null) {
             throw new EOFException(e.getMessage());
           }
@@ -269,34 +283,45 @@
     }
   };
 
-
   /**
-   * The simple constructor which assumes ctl-a, ctl-b and '\n' separators and to return empty strings for empty fields.
-   *
-   * @param trans - the ttransport to use as input or output
-   *
+   * The simple constructor which assumes ctl-a, ctl-b and '\n' separators and
+   * to return empty strings for empty fields.
+   * 
+   * @param trans
+   *          - the ttransport to use as input or output
+   * 
    */
 
   public TCTLSeparatedProtocol(TTransport trans) {
-    this(trans, defaultPrimarySeparator, defaultSecondarySeparator, defaultMapSeparator, defaultRowSeparator, true, 4096);
+    this(trans, defaultPrimarySeparator, defaultSecondarySeparator,
+        defaultMapSeparator, defaultRowSeparator, true, 4096);
   }
 
   public TCTLSeparatedProtocol(TTransport trans, int buffer_size) {
-    this(trans, defaultPrimarySeparator, defaultSecondarySeparator, defaultMapSeparator, defaultRowSeparator, true, buffer_size);
+    this(trans, defaultPrimarySeparator, defaultSecondarySeparator,
+        defaultMapSeparator, defaultRowSeparator, true, buffer_size);
   }
 
   /**
-   * @param trans - the ttransport to use as input or output
-   * @param primarySeparator the separator between columns (aka fields)
-   * @param secondarySeparator the separator within a field for things like sets and maps and lists
-   * @param mapSeparator - the key/value separator
-   * @param rowSeparator - the record separator
-   * @param returnNulls - whether to return a null or an empty string for fields that seem empty (ie two primary separators back to back)
-   */
-
-  public TCTLSeparatedProtocol(TTransport trans, String primarySeparator, String secondarySeparator, String mapSeparator, String rowSeparator, 
-                               boolean returnNulls,
-                               int bufferSize) {
+   * @param trans
+   *          - the ttransport to use as input or output
+   * @param primarySeparator
+   *          the separator between columns (aka fields)
+   * @param secondarySeparator
+   *          the separator within a field for things like sets and maps and
+   *          lists
+   * @param mapSeparator
+   *          - the key/value separator
+   * @param rowSeparator
+   *          - the record separator
+   * @param returnNulls
+   *          - whether to return a null or an empty string for fields that seem
+   *          empty (ie two primary separators back to back)
+   */
+
+  public TCTLSeparatedProtocol(TTransport trans, String primarySeparator,
+      String secondarySeparator, String mapSeparator, String rowSeparator,
+      boolean returnNulls, int bufferSize) {
     super(trans);
 
     this.returnNulls = returnNulls;
@@ -306,51 +331,52 @@
     this.rowSeparator = rowSeparator;
     this.mapSeparator = mapSeparator;
 
-    this.innerTransport = trans;
+    innerTransport = trans;
     this.bufferSize = bufferSize;
-    this.nullString  = "\\N";
+    nullString = "\\N";
   }
 
-
-
   /**
    * Sets the internal separator patterns and creates the internal tokenizer.
    */
   protected void internalInitialize() {
 
-    // in the future could allow users to specify a quote character that doesn't need escaping but for now ...
-    final String primaryPatternString = 
-      quote == null ? primarySeparator : 
-      "(?:^|" + primarySeparator + ")(" + quote + "(?:[^"  + quote + "]+|" + quote + quote + ")*" + quote + "|[^" + primarySeparator + "]*)";
+    // in the future could allow users to specify a quote character that doesn't
+    // need escaping but for now ...
+    final String primaryPatternString = quote == null ? primarySeparator
+        : "(?:^|" + primarySeparator + ")(" + quote + "(?:[^" + quote + "]+|"
+            + quote + quote + ")*" + quote + "|[^" + primarySeparator + "]*)";
 
     if (quote != null) {
       stripSeparatorPrefix = Pattern.compile("^" + primarySeparator);
       stripQuotePrefix = Pattern.compile("^" + quote);
       stripQuotePostfix = Pattern.compile(quote + "$");
-    }      
+    }
 
     primaryPattern = Pattern.compile(primaryPatternString);
     secondaryPattern = Pattern.compile(secondarySeparator);
     mapPattern = Pattern.compile(secondarySeparator + "|" + mapSeparator);
     nullText = new Text(nullString);
-    transportTokenizer = new SimpleTransportTokenizer(innerTransport, rowSeparator, bufferSize);
+    transportTokenizer = new SimpleTransportTokenizer(innerTransport,
+        rowSeparator, bufferSize);
   }
 
   /**
-   * For quoted fields, strip away the quotes and also need something to strip away the control separator when using
-   * complex split method defined here.
+   * For quoted fields, strip away the quotes and also need something to strip
+   * away the control separator when using complex split method defined here.
    */
   protected Pattern stripSeparatorPrefix;
   protected Pattern stripQuotePrefix;
   protected Pattern stripQuotePostfix;
 
-
-  /** 
-   *
+  /**
+   * 
    * Split the line based on a complex regex pattern
-   *
-   * @param line the current row
-   * @param p the pattern for matching fields in the row
+   * 
+   * @param line
+   *          the current row
+   * @param p
+   *          the pattern for matching fields in the row
    * @return List of Strings - not including the separator in them
    */
   protected String[] complexSplit(String line, Pattern p) {
@@ -360,104 +386,116 @@
     // For each field
     while (m.find()) {
       String match = m.group();
-      if (match == null)
+      if (match == null) {
         break;
-      if (match.length() == 0)
+      }
+      if (match.length() == 0) {
         match = null;
-      else {
-        if(stripSeparatorPrefix.matcher(match).find()) {
+      } else {
+        if (stripSeparatorPrefix.matcher(match).find()) {
           match = match.substring(1);
         }
-        if(stripQuotePrefix.matcher(match).find()) {
+        if (stripQuotePrefix.matcher(match).find()) {
           match = match.substring(1);
         }
-        if(stripQuotePostfix.matcher(match).find()) {
-          match = match.substring(0,match.length() - 1);
+        if (stripQuotePostfix.matcher(match).find()) {
+          match = match.substring(0, match.length() - 1);
         }
       }
       list.add(match);
     }
-    return (String[])list.toArray(new String[1]);
+    return list.toArray(new String[1]);
   }
 
-
-
   protected String getByteValue(String altValue, String defaultVal) {
     if (altValue != null && altValue.length() > 0) {
       try {
-        byte b [] = new byte[1];
+        byte b[] = new byte[1];
         b[0] = Byte.valueOf(altValue).byteValue();
         return new String(b);
-      } catch(NumberFormatException e) {
+      } catch (NumberFormatException e) {
         return altValue;
       }
     }
     return defaultVal;
   }
-  
 
   /**
    * Initialize the TProtocol
-   * @param conf System properties
-   * @param tbl  table properties
+   * 
+   * @param conf
+   *          System properties
+   * @param tbl
+   *          table properties
    * @throws TException
    */
   public void initialize(Configuration conf, Properties tbl) throws TException {
 
-
-    primarySeparator = getByteValue(tbl.getProperty(Constants.FIELD_DELIM), primarySeparator);
-    secondarySeparator = getByteValue(tbl.getProperty(Constants.COLLECTION_DELIM), secondarySeparator);
-    rowSeparator = getByteValue(tbl.getProperty(Constants.LINE_DELIM), rowSeparator);
-    mapSeparator = getByteValue(tbl.getProperty(Constants.MAPKEY_DELIM), mapSeparator);
-    returnNulls = Boolean.valueOf(tbl.getProperty(ReturnNullsKey, String.valueOf(returnNulls))).booleanValue();
-    bufferSize =  Integer.valueOf(tbl.getProperty(BufferSizeKey, String.valueOf(bufferSize))).intValue();
-    nullString  = tbl.getProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N");
-    quote  = tbl.getProperty(Constants.QUOTE_CHAR, null);
+    primarySeparator = getByteValue(tbl.getProperty(Constants.FIELD_DELIM),
+        primarySeparator);
+    secondarySeparator = getByteValue(tbl
+        .getProperty(Constants.COLLECTION_DELIM), secondarySeparator);
+    rowSeparator = getByteValue(tbl.getProperty(Constants.LINE_DELIM),
+        rowSeparator);
+    mapSeparator = getByteValue(tbl.getProperty(Constants.MAPKEY_DELIM),
+        mapSeparator);
+    returnNulls = Boolean.valueOf(
+        tbl.getProperty(ReturnNullsKey, String.valueOf(returnNulls)))
+        .booleanValue();
+    bufferSize = Integer.valueOf(
+        tbl.getProperty(BufferSizeKey, String.valueOf(bufferSize))).intValue();
+    nullString = tbl.getProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N");
+    quote = tbl.getProperty(Constants.QUOTE_CHAR, null);
 
     internalInitialize();
 
   }
 
+  @Override
   public void writeMessageBegin(TMessage message) throws TException {
   }
 
+  @Override
   public void writeMessageEnd() throws TException {
   }
 
+  @Override
   public void writeStructBegin(TStruct struct) throws TException {
     firstField = true;
   }
 
+  @Override
   public void writeStructEnd() throws TException {
-    // We don't write rowSeparatorByte because that should be handled by file format.
+    // We don't write rowSeparatorByte because that should be handled by file
+    // format.
   }
 
+  @Override
   public void writeFieldBegin(TField field) throws TException {
-    if(! firstField) {
+    if (!firstField) {
       internalWriteString(primarySeparator);
     }
     firstField = false;
   }
 
+  @Override
   public void writeFieldEnd() throws TException {
   }
 
+  @Override
   public void writeFieldStop() {
   }
 
+  @Override
   public void writeMapBegin(TMap map) throws TException {
     // nesting not allowed!
-    if(map.keyType == TType.STRUCT ||
-       map.keyType == TType.MAP ||
-       map.keyType == TType.LIST ||
-       map.keyType == TType.SET) {
+    if (map.keyType == TType.STRUCT || map.keyType == TType.MAP
+        || map.keyType == TType.LIST || map.keyType == TType.SET) {
       throw new TException("Not implemented: nested structures");
     }
     // nesting not allowed!
-    if(map.valueType == TType.STRUCT ||
-       map.valueType == TType.MAP ||
-       map.valueType == TType.LIST ||
-       map.valueType == TType.SET) {
+    if (map.valueType == TType.STRUCT || map.valueType == TType.MAP
+        || map.valueType == TType.LIST || map.valueType == TType.SET) {
       throw new TException("Not implemented: nested structures");
     }
 
@@ -467,71 +505,80 @@
     elemIndex = 0;
   }
 
+  @Override
   public void writeMapEnd() throws TException {
     isMap = false;
     inner = false;
   }
 
+  @Override
   public void writeListBegin(TList list) throws TException {
-    if(list.elemType == TType.STRUCT ||
-       list.elemType == TType.MAP ||
-       list.elemType == TType.LIST ||
-       list.elemType == TType.SET) {
+    if (list.elemType == TType.STRUCT || list.elemType == TType.MAP
+        || list.elemType == TType.LIST || list.elemType == TType.SET) {
       throw new TException("Not implemented: nested structures");
     }
     firstInnerField = true;
     inner = true;
   }
 
+  @Override
   public void writeListEnd() throws TException {
     inner = false;
   }
-  
+
+  @Override
   public void writeSetBegin(TSet set) throws TException {
-    if(set.elemType == TType.STRUCT ||
-       set.elemType == TType.MAP ||
-       set.elemType == TType.LIST ||
-       set.elemType == TType.SET) {
+    if (set.elemType == TType.STRUCT || set.elemType == TType.MAP
+        || set.elemType == TType.LIST || set.elemType == TType.SET) {
       throw new TException("Not implemented: nested structures");
     }
     firstInnerField = true;
     inner = true;
   }
 
+  @Override
   public void writeSetEnd() throws TException {
     inner = false;
   }
 
+  @Override
   public void writeBool(boolean b) throws TException {
     writeString(String.valueOf(b));
   }
 
   // for writing out single byte
-  private byte buf[] = new byte[1];
+  private final byte buf[] = new byte[1];
+
+  @Override
   public void writeByte(byte b) throws TException {
     buf[0] = b;
     trans_.write(buf);
   }
 
+  @Override
   public void writeI16(short i16) throws TException {
     writeString(String.valueOf(i16));
   }
 
+  @Override
   public void writeI32(int i32) throws TException {
     writeString(String.valueOf(i32));
   }
 
+  @Override
   public void writeI64(long i64) throws TException {
     writeString(String.valueOf(i64));
   }
 
+  @Override
   public void writeDouble(double dub) throws TException {
     writeString(String.valueOf(dub));
   }
 
   Text tmpText = new Text();
-  public void internalWriteString(String str) throws TException  {
-    if(str != null) {
+
+  public void internalWriteString(String str) throws TException {
+    if (str != null) {
       tmpText.set(str);
       trans_.write(tmpText.getBytes(), 0, tmpText.getLength());
     } else {
@@ -539,11 +586,13 @@
     }
   }
 
+  @Override
   public void writeString(String str) throws TException {
-    if(inner) {
-      if(!firstInnerField) {
-        // super hack city notice the mod plus only happens after firstfield hit, so == 0 is right.
-        if(isMap && elemIndex++ % 2 == 0) {
+    if (inner) {
+      if (!firstInnerField) {
+        // super hack city notice the mod plus only happens after firstfield
+        // hit, so == 0 is right.
+        if (isMap && elemIndex++ % 2 == 0) {
           internalWriteString(mapSeparator);
         } else {
           internalWriteString(secondarySeparator);
@@ -555,72 +604,79 @@
     internalWriteString(str);
   }
 
+  @Override
   public void writeBinary(byte[] bin) throws TException {
-    throw new TException("Ctl separated protocol cannot support writing Binary data!");
+    throw new TException(
+        "Ctl separated protocol cannot support writing Binary data!");
   }
 
+  @Override
   public TMessage readMessageBegin() throws TException {
-    return new TMessage(); 
+    return new TMessage();
   }
 
+  @Override
   public void readMessageEnd() throws TException {
   }
 
- public TStruct readStructBegin() throws TException {
-   assert(!inner);
-   try {
-     final String tmp = transportTokenizer.nextToken();
-     columns = quote == null ? primaryPattern.split(tmp) : complexSplit(tmp, primaryPattern);
-     index = 0;
-     return new TStruct();
-   } catch(EOFException e) {
-     return null;
-   }
+  @Override
+  public TStruct readStructBegin() throws TException {
+    assert (!inner);
+    try {
+      final String tmp = transportTokenizer.nextToken();
+      columns = quote == null ? primaryPattern.split(tmp) : complexSplit(tmp,
+          primaryPattern);
+      index = 0;
+      return new TStruct();
+    } catch (EOFException e) {
+      return null;
+    }
   }
 
+  @Override
   public void readStructEnd() throws TException {
     columns = null;
   }
 
-
   /**
-   * Skip past the current field
-   * Just increments the field index counter.
+   * Skip past the current field Just increments the field index counter.
    */
   public void skip(byte type) {
-    if( inner) {
+    if (inner) {
       innerIndex++;
     } else {
       index++;
     }
   }
 
-
+  @Override
   public TField readFieldBegin() throws TException {
-    assert( !inner);
-    TField f = new TField("", ORDERED_TYPE, (short)-1);
-    // slight hack to communicate to DynamicSerDe that the field ids are not being set but things are ordered.
-    return  f;
+    assert (!inner);
+    TField f = new TField("", ORDERED_TYPE, (short) -1);
+    // slight hack to communicate to DynamicSerDe that the field ids are not
+    // being set but things are ordered.
+    return f;
   }
 
+  @Override
   public void readFieldEnd() throws TException {
     fields = null;
   }
 
+  @Override
   public TMap readMapBegin() throws TException {
-    assert( !inner);
+    assert (!inner);
     TMap map = new TMap();
-    if(columns[index] == null ||
-       columns[index].equals(nullString)) {
+    if (columns[index] == null || columns[index].equals(nullString)) {
       index++;
-      if(returnNulls) {
+      if (returnNulls) {
         return null;
       }
-    } else if(columns[index].isEmpty()) {
+    } else if (columns[index].isEmpty()) {
       index++;
     } else {
       fields = mapPattern.split(columns[index++]);
-      map = new TMap(ORDERED_TYPE, ORDERED_TYPE, fields.length/2);
+      map = new TMap(ORDERED_TYPE, ORDERED_TYPE, fields.length / 2);
     }
     innerIndex = 0;
     inner = true;
@@ -628,21 +684,22 @@
     return map;
   }
 
+  @Override
   public void readMapEnd() throws TException {
     inner = false;
     isMap = false;
   }
 
+  @Override
   public TList readListBegin() throws TException {
-    assert( !inner);
+    assert (!inner);
     TList list = new TList();
-    if(columns[index] == null ||
-       columns[index].equals(nullString)) {
+    if (columns[index] == null || columns[index].equals(nullString)) {
       index++;
-      if(returnNulls) {
+      if (returnNulls) {
         return null;
       }
-    } else if(columns[index].isEmpty()) {
+    } else if (columns[index].isEmpty()) {
       index++;
     } else {
       fields = secondaryPattern.split(columns[index++]);
@@ -653,20 +710,21 @@
     return list;
   }
 
+  @Override
   public void readListEnd() throws TException {
     inner = false;
   }
 
+  @Override
   public TSet readSetBegin() throws TException {
-    assert( !inner);
+    assert (!inner);
     TSet set = new TSet();
-    if(columns[index] == null ||
-       columns[index].equals(nullString)) {
+    if (columns[index] == null || columns[index].equals(nullString)) {
       index++;
-      if(returnNulls) {
+      if (returnNulls) {
         return null;
       }
-    } else if(columns[index].isEmpty()) {
+    } else if (columns[index].isEmpty()) {
       index++;
     } else {
       fields = secondaryPattern.split(columns[index++]);
@@ -687,16 +745,20 @@
     writeString(null);
   }
 
+  @Override
   public void readSetEnd() throws TException {
     inner = false;
   }
 
+  @Override
   public boolean readBool() throws TException {
     String val = readString();
     lastPrimitiveWasNullFlag = val == null;
-    return val == null || val.isEmpty() ? false : Boolean.valueOf(val).booleanValue();
+    return val == null || val.isEmpty() ? false : Boolean.valueOf(val)
+        .booleanValue();
   }
 
+  @Override
   public byte readByte() throws TException {
     String val = readString();
     lastPrimitiveWasNullFlag = val == null;
@@ -708,6 +770,7 @@
     }
   }
 
+  @Override
   public short readI16() throws TException {
     String val = readString();
     lastPrimitiveWasNullFlag = val == null;
@@ -719,6 +782,7 @@
     }
   }
 
+  @Override
   public int readI32() throws TException {
     String val = readString();
     lastPrimitiveWasNullFlag = val == null;
@@ -730,6 +794,7 @@
     }
   }
 
+  @Override
   public long readI64() throws TException {
     String val = readString();
     lastPrimitiveWasNullFlag = val == null;
@@ -741,32 +806,38 @@
     }
   }
 
+  @Override
   public double readDouble() throws TException {
     String val = readString();
     lastPrimitiveWasNullFlag = val == null;
     try {
-      return val == null || val.isEmpty() ? 0 :Double.valueOf(val).doubleValue();
+      return val == null || val.isEmpty() ? 0 : Double.valueOf(val)
+          .doubleValue();
     } catch (NumberFormatException e) {
       lastPrimitiveWasNullFlag = true;
       return 0;
     }
   }
 
+  @Override
   public String readString() throws TException {
     String ret;
-    if(!inner) {
-      ret = columns != null && index < columns.length ? columns[index] :  null;
+    if (!inner) {
+      ret = columns != null && index < columns.length ? columns[index] : null;
       index++;
     } else {
-      ret = fields != null && innerIndex < fields.length ? fields[innerIndex] : null;
+      ret = fields != null && innerIndex < fields.length ? fields[innerIndex]
+          : null;
       innerIndex++;
     }
-    if(ret == null || ret.equals(nullString))
+    if (ret == null || ret.equals(nullString)) {
       return returnNulls ? null : "";
-    else
+    } else {
       return ret;
+    }
   }
 
+  @Override
   public byte[] readBinary() throws TException {
     throw new TException("Not implemented for control separated data");
   }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TReflectionUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TReflectionUtils.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TReflectionUtils.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TReflectionUtils.java Thu Jan 21 09:52:44 2010
@@ -20,25 +20,23 @@
 
 import org.apache.thrift.protocol.TProtocolFactory;
 
-
 public class TReflectionUtils {
   public static final String thriftReaderFname = "read";
   public static final String thriftWriterFname = "write";
 
-  public static final Class<?> [] thriftRWParams;
+  public static final Class<?>[] thriftRWParams;
   static {
     try {
-      thriftRWParams = new Class [] {
-      Class.forName("org.apache.thrift.protocol.TProtocol")
-      };
+      thriftRWParams = new Class[] { Class
+          .forName("org.apache.thrift.protocol.TProtocol") };
     } catch (ClassNotFoundException e) {
       throw new RuntimeException(e);
     }
   }
 
   public static TProtocolFactory getProtocolFactoryByName(String protocolName)
-    throws Exception {
+      throws Exception {
     Class<?> protoClass = Class.forName(protocolName + "$Factory");
-    return ((TProtocolFactory)protoClass.newInstance());
+    return ((TProtocolFactory) protoClass.newInstance());
   }
 }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java Thu Jan 21 09:52:44 2010
@@ -26,7 +26,6 @@
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.io.Writable;
-
 import org.apache.thrift.TBase;
 import org.apache.thrift.protocol.TProtocol;
 import org.apache.thrift.protocol.TProtocolFactory;
@@ -37,19 +36,24 @@
   protected TIOStreamTransport outTransport, inTransport;
   protected TProtocol outProtocol, inProtocol;
 
-  private void init(TProtocolFactory inFactory, TProtocolFactory outFactory) throws Exception {
+  private void init(TProtocolFactory inFactory, TProtocolFactory outFactory)
+      throws Exception {
     outTransport = new TIOStreamTransport(bos);
     inTransport = new TIOStreamTransport(bis);
     outProtocol = outFactory.getProtocol(outTransport);
     inProtocol = inFactory.getProtocol(inTransport);
   }
 
-  public void initialize(Configuration job, Properties tbl) throws SerDeException {
-    throw new SerDeException("ThriftByteStreamTypedSerDe is still semi-abstract");
+  @Override
+  public void initialize(Configuration job, Properties tbl)
+      throws SerDeException {
+    throw new SerDeException(
+        "ThriftByteStreamTypedSerDe is still semi-abstract");
   }
 
-  public ThriftByteStreamTypedSerDe(Type objectType, TProtocolFactory inFactory,
-                                    TProtocolFactory outFactory) throws SerDeException {
+  public ThriftByteStreamTypedSerDe(Type objectType,
+      TProtocolFactory inFactory, TProtocolFactory outFactory)
+      throws SerDeException {
     super(objectType);
     try {
       init(inFactory, outFactory);
@@ -58,14 +62,16 @@
     }
   }
 
+  @Override
   protected ObjectInspectorFactory.ObjectInspectorOptions getObjectInspectorOptions() {
     return ObjectInspectorFactory.ObjectInspectorOptions.THRIFT;
   }
-  
+
+  @Override
   public Object deserialize(Writable field) throws SerDeException {
     Object obj = super.deserialize(field);
     try {
-      ((TBase)obj).read(inProtocol);
+      ((TBase) obj).read(inProtocol);
     } catch (Exception e) {
       throw new SerDeException(e);
     }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java Thu Jan 21 09:52:44 2010
@@ -31,27 +31,33 @@
 
   private ThriftByteStreamTypedSerDe tsd;
 
-  public ThriftDeserializer() { }
-  
-  public void initialize(Configuration job, Properties tbl) throws SerDeException {
+  public ThriftDeserializer() {
+  }
+
+  public void initialize(Configuration job, Properties tbl)
+      throws SerDeException {
     try {
       // both the classname and the protocol name are Table properties
       // the only hardwired assumption is that records are fixed on a
       // per Table basis
 
-      String className = tbl.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_CLASS);
+      String className = tbl
+          .getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_CLASS);
       Class<?> recordClass = job.getClassByName(className);
 
-      String protoName = tbl.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT);
+      String protoName = tbl
+          .getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT);
       if (protoName == null) {
         protoName = "TBinaryProtocol";
       }
       // For backward compatibility
-      protoName = protoName.replace("com.facebook.thrift.protocol", "org.apache.thrift.protocol");
+      protoName = protoName.replace("com.facebook.thrift.protocol",
+          "org.apache.thrift.protocol");
 
-      TProtocolFactory tp = TReflectionUtils.getProtocolFactoryByName(protoName);
+      TProtocolFactory tp = TReflectionUtils
+          .getProtocolFactoryByName(protoName);
       tsd = new ThriftByteStreamTypedSerDe(recordClass, tp, tp);
-      
+
     } catch (Exception e) {
       throw new SerDeException(e);
     }
@@ -60,7 +66,7 @@
   public Object deserialize(Writable field) throws SerDeException {
     return tsd.deserialize(field);
   }
-  
+
   public ObjectInspector getObjectInspector() throws SerDeException {
     return tsd.getObjectInspector();
   }

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java?rev=901625&r1=901624&r2=901625&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java Thu Jan 21 09:52:44 2010
@@ -21,23 +21,22 @@
 import org.apache.thrift.TException;
 
 /**
- * An interface for TProtocols that actually write out nulls - 
- * This should be for all those that don't actually use
- * fieldids in the written data like TCTLSeparatedProtocol.
+ * An interface for TProtocols that actually write out nulls - This should be
+ * for all those that don't actually use fieldids in the written data like
+ * TCTLSeparatedProtocol.
  * 
  */
 public interface WriteNullsProtocol {
   /**
-   * Was the last primitive read really a NULL. Need
-   * only be called when the value of the primitive
-   * was 0. ie the protocol should return 0 on nulls
-   * and the caller will then check if it was actually null
-   * For boolean this is false.
+   * Was the last primitive read really a NULL. Need only be called when the
+   * value of the primitive was 0. ie the protocol should return 0 on nulls and
+   * the caller will then check if it was actually null For boolean this is
+   * false.
    */
   public boolean lastPrimitiveWasNull() throws TException;
 
   /**
-   * Write a null 
+   * Write a null
    */
   public void writeNull() throws TException;