You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/02/06 00:29:01 UTC

svn commit: r741339 - in /hadoop/hive/trunk: ./ data/files/ ql/src/test/org/apache/hadoop/hive/ql/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ serde/src/java/org/apache/hadoop/hive/serde2/thrift/

Author: zshao
Date: Thu Feb  5 23:29:00 2009
New Revision: 741339

URL: http://svn.apache.org/viewvc?rev=741339&view=rev
Log:
HIVE-264. TBinarySortable Protocol supports null characters. (zshao)

Added:
    hadoop/hive/trunk/data/files/string.txt   (with props)
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/binarysortable_1.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/build-common.xml
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=741339&r1=741338&r2=741339&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Feb  5 23:29:00 2009
@@ -120,6 +120,8 @@
 
   BUG FIXES
 
+    HIVE-264. TBinarySortable Protocol supports null characters. (zshao)
+
     HIVE-255. Propagate user name to Hive metastore. (Prasad Chakka via zshao)
 
     HIVE-263. TCTLSeparatedProtocol should use UTF-8 to encode/decode

Modified: hadoop/hive/trunk/build-common.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/build-common.xml?rev=741339&r1=741338&r2=741339&view=diff
==============================================================================
--- hadoop/hive/trunk/build-common.xml (original)
+++ hadoop/hive/trunk/build-common.xml Thu Feb  5 23:29:00 2009
@@ -59,7 +59,7 @@
   <property name="test.include" value="Test*"/>
   <property name="test.classpath.id" value="test.classpath"/>
   <property name="test.output" value="true"/>
-  <property name="test.timeout" value="900000"/>
+  <property name="test.timeout" value="1800000"/>
   <property name="test.junit.output.format" value="xml"/>
   <property name="test.junit.output.usefile" value="true"/>
 

Added: hadoop/hive/trunk/data/files/string.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/data/files/string.txt?rev=741339&view=auto
==============================================================================
Binary file - no diff available.

Propchange: hadoop/hive/trunk/data/files/string.txt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java?rev=741339&r1=741338&r2=741339&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java Thu Feb  5 23:29:00 2009
@@ -642,12 +642,13 @@
   public int checkCliDriverResults(String tname) throws Exception {
     String [] cmdArray;
 
-    cmdArray = new String[5];
+    cmdArray = new String[6];
     cmdArray[0] = "diff";
-    cmdArray[1] = "-I";
-    cmdArray[2] = "\\(file:\\)\\|\\(/tmp/.*\\)";
-    cmdArray[3] = (new File(logDir, tname + ".out")).getPath();
-    cmdArray[4] = (new File(outDir, tname + ".out")).getPath();
+    cmdArray[1] = "-a";
+    cmdArray[2] = "-I";
+    cmdArray[3] = "\\(file:\\)\\|\\(/tmp/.*\\)";
+    cmdArray[4] = (new File(logDir, tname + ".out")).getPath();
+    cmdArray[5] = (new File(outDir, tname + ".out")).getPath();
     System.out.println(cmdArray[0] + " " + cmdArray[1] + " " + cmdArray[2] + " " +
                        cmdArray[3] + " " + cmdArray[4]);
 

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/binarysortable_1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/binarysortable_1.q?rev=741339&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/binarysortable_1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/binarysortable_1.q Thu Feb  5 23:29:00 2009
@@ -0,0 +1,24 @@
+CREATE TABLE mytable(key STRING, value STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '9'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/string.txt' INTO TABLE mytable;
+
+EXPLAIN
+SELECT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(key, '\001', '^A'), '\0', '^@'), '\002', '^B'), value
+FROM (
+        SELECT key, sum(value) as value
+        FROM mytable
+        GROUP BY key
+) a;
+
+SELECT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(key, '\001', '^A'), '\0', '^@'), '\002', '^B'), value
+FROM (
+        SELECT key, sum(value) as value
+        FROM mytable
+        GROUP BY key
+) a;
+
+
+DROP TABLE mytable;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out?rev=741339&view=auto
==============================================================================
Files hadoop/hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out (added) and hadoop/hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out Thu Feb  5 23:29:00 2009 differ

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java?rev=741339&r1=741338&r2=741339&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java Thu Feb  5 23:29:00 2009
@@ -45,7 +45,7 @@
  *   Int:     flip the sign-bit to make sure negative comes before positive
  *   Long:    flip the sign-bit to make sure negative comes before positive
  *   Double:  flip the sign-bit for positive double, and all bits for negative double values
- *   String:  NULL-terminated string
+ *   String:  NULL-terminated UTF-8 string, with NULL escaped to \1 \1, and \1 escaped to \1 \2
  * NON-NULL Complex Types:
  *   Struct:  first the single byte \1, and then one field by one field.
  *   List:    size stored as Int (see above), then one element by one element. 
@@ -283,6 +283,16 @@
 
   final protected byte[] nullByte = new byte[] {0};
   final protected byte[] nonNullByte = new byte[] {1};
+  /**
+   * The escaped byte sequence for the null byte.
+   * This cannot be changed alone without changing the readString() code.
+   */
+  final protected byte[] escapedNull = new byte[] {1,1};
+  /**
+   * The escaped byte sequence for the "\1" byte.
+   * This cannot be changed alone without changing the readString() code.
+   */
+  final protected byte[] escapedOne = new byte[] {1,2};
   public void writeString(String str) throws TException {
     byte[] dat;
     try {
@@ -290,13 +300,31 @@
     } catch (UnsupportedEncodingException uex) {
       throw new TException("JVM DOES NOT SUPPORT UTF-8: " + uex.getMessage());
     }
-    for(int i=0; i<str.length(); i++) {
-      if (str.charAt(i) == '\0') {
-        throw new TException( getClass().getName() + " does not support serializing strings with null bytes!");
+    writeRawBytes(nonNullByte, 0, 1);
+    int begin = 0;
+    int i = 0;
+    for (; i < dat.length; i++) {
+      if (dat[i] == 0 || dat[i] == 1) {
+        // Write the first part of the array
+        if (i > begin) {
+          writeRawBytes(dat, begin, i-begin);
+        }
+        // Write the escaped byte.
+        if (dat[i] == 0) {
+          writeRawBytes(escapedNull, 0, escapedNull.length);
+        } else {
+          writeRawBytes(escapedOne, 0, escapedOne.length);
+        }
+        // Move the pointer to the next byte, since we have written
+        // out the escaped byte in the block above already.
+        begin = i+1;
       }
     }
-    writeRawBytes(nonNullByte, 0, 1);
-    writeRawBytes(dat, 0, dat.length);
+    // Write the remaining part of the array
+    if (i > begin) {
+      writeRawBytes(dat, begin, i-begin);
+    }
+    // Write the terminating NULL byte
     writeRawBytes(nullByte, 0, 1);
   }
 
@@ -507,14 +535,20 @@
     while (true) {
       readRawAll(bin, 0, 1);
       if (bin[0] == 0) {
+        // End of string.
         break;
-      } else {
-        if (i == stringBytes.length) {
-          stringBytes = Arrays.copyOf(stringBytes, stringBytes.length*2);
-        }
-        stringBytes[i] = bin[0];
-        i++;
       }
+      if (bin[0] == 1) {
+        // Escaped byte, unescape it.
+        readRawAll(bin, 0, 1);
+        assert(bin[0] == 1 || bin[0] == 2);
+        bin[0] = (byte)(bin[0] - 1);
+      }
+      if (i == stringBytes.length) {
+        stringBytes = Arrays.copyOf(stringBytes, stringBytes.length*2);
+      }
+      stringBytes[i] = bin[0];
+      i++;
     }
     try {
       String r = new String(stringBytes, 0, i, "UTF-8");