You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/02/03 02:55:46 UTC

svn commit: r740188 - in /hadoop/hive/branches/branch-0.2: CHANGES.txt serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java

Author: zshao
Date: Tue Feb  3 01:55:46 2009
New Revision: 740188

URL: http://svn.apache.org/viewvc?rev=740188&view=rev
Log:
HIVE-263. TCTLSeparatedProtocol should use UTF-8 to encode/decode the data. (zshao)

Modified:
    hadoop/hive/branches/branch-0.2/CHANGES.txt
    hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java

Modified: hadoop/hive/branches/branch-0.2/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/CHANGES.txt?rev=740188&r1=740187&r2=740188&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.2/CHANGES.txt (original)
+++ hadoop/hive/branches/branch-0.2/CHANGES.txt Tue Feb  3 01:55:46 2009
@@ -115,6 +115,9 @@
 
   BUG FIXES
 
+    HIVE-263. TCTLSeparatedProtocol should use UTF-8 to encode/decode
+    the data. (zshao)
+
     HIVE-255. Propagate user name to Hive metastore. (Prasad Chakka via zshao)
 
     HIVE-235. Fixed DynamicSerDe to work with null values with Thrift

Modified: hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java?rev=740188&r1=740187&r2=740188&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java (original)
+++ hadoop/hive/branches/branch-0.2/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java Tue Feb  3 01:55:46 2009
@@ -22,6 +22,8 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.io.Text;
+
 import com.facebook.thrift.TException;
 import com.facebook.thrift.transport.*;
 import com.facebook.thrift.*;
@@ -30,6 +32,9 @@
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
 import java.io.*;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+
 import org.apache.hadoop.conf.Configuration;
 import java.util.Properties;
 
@@ -178,9 +183,9 @@
   protected String nullString;
 
   /**
-   * The nullString in bytes
+   * The nullString in UTF-8 bytes
    */ 
-  protected byte nullBuf[];
+  protected Text nullText;
 
 
   /**
@@ -210,7 +215,12 @@
             tokenizer = new StringTokenizer("", separator, true);
             return false;
           }
-          String row = new String(buf, 0, length);
+          String row;
+          try {
+            row = Text.decode(buf, 0, length);
+          } catch (CharacterCodingException e) {
+            throw new RuntimeException(e);
+          }
           tokenizer = new StringTokenizer(row, separator, true);
         } catch(TTransportException e) {
           e.printStackTrace();
@@ -322,7 +332,7 @@
     primaryPattern = Pattern.compile(primaryPatternString);
     secondaryPattern = Pattern.compile(secondarySeparator);
     mapPattern = Pattern.compile(secondarySeparator + "|" + mapSeparator);
-    nullBuf = nullString.getBytes();
+    nullText = new Text(nullString);
     transportTokenizer = new SimpleTransportTokenizer(innerTransport, rowSeparator, bufferSize);
   }
 
@@ -519,12 +529,13 @@
     writeString(String.valueOf(dub));
   }
 
-  public void internalWriteString(String str) throws TException {
+  Text tmpText = new Text();
+  public void internalWriteString(String str) throws TException  {
     if(str != null) {
-      final byte buf[] = str.getBytes();
-      trans_.write(buf, 0, buf.length);
+      tmpText.set(str);
+      trans_.write(tmpText.getBytes(), 0, tmpText.getLength());
     } else {
-      trans_.write(nullBuf, 0, nullBuf.length);
+      trans_.write(nullText.getBytes(), 0, nullText.getLength());
     }
   }
 
@@ -541,12 +552,7 @@
         firstInnerField = false;
       }
     }
-    if(str != null) {
-      final byte buf[] = str.getBytes();
-      trans_.write(buf, 0, buf.length);
-    } else {
-      trans_.write(nullBuf, 0, nullBuf.length);
-    }
+    internalWriteString(str);
   }
 
   public void writeBinary(byte[] bin) throws TException {