You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/02/06 00:29:01 UTC
svn commit: r741339 - in /hadoop/hive/trunk: ./ data/files/
ql/src/test/org/apache/hadoop/hive/ql/ ql/src/test/queries/clientpositive/
ql/src/test/results/clientpositive/
serde/src/java/org/apache/hadoop/hive/serde2/thrift/
Author: zshao
Date: Thu Feb 5 23:29:00 2009
New Revision: 741339
URL: http://svn.apache.org/viewvc?rev=741339&view=rev
Log:
HIVE-264. TBinarySortable Protocol supports null characters. (zshao)
Added:
hadoop/hive/trunk/data/files/string.txt (with props)
hadoop/hive/trunk/ql/src/test/queries/clientpositive/binarysortable_1.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/build-common.xml
hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=741339&r1=741338&r2=741339&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Feb 5 23:29:00 2009
@@ -120,6 +120,8 @@
BUG FIXES
+ HIVE-264. TBinarySortable Protocol supports null characters. (zshao)
+
HIVE-255. Propagate user name to Hive metastore. (Prasad Chakka via zshao)
HIVE-263. TCTLSeparatedProtocol should use UTF-8 to encode/decode
Modified: hadoop/hive/trunk/build-common.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/build-common.xml?rev=741339&r1=741338&r2=741339&view=diff
==============================================================================
--- hadoop/hive/trunk/build-common.xml (original)
+++ hadoop/hive/trunk/build-common.xml Thu Feb 5 23:29:00 2009
@@ -59,7 +59,7 @@
<property name="test.include" value="Test*"/>
<property name="test.classpath.id" value="test.classpath"/>
<property name="test.output" value="true"/>
- <property name="test.timeout" value="900000"/>
+ <property name="test.timeout" value="1800000"/>
<property name="test.junit.output.format" value="xml"/>
<property name="test.junit.output.usefile" value="true"/>
Added: hadoop/hive/trunk/data/files/string.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/data/files/string.txt?rev=741339&view=auto
==============================================================================
Binary file - no diff available.
Propchange: hadoop/hive/trunk/data/files/string.txt
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java?rev=741339&r1=741338&r2=741339&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java Thu Feb 5 23:29:00 2009
@@ -642,12 +642,13 @@
public int checkCliDriverResults(String tname) throws Exception {
String [] cmdArray;
- cmdArray = new String[5];
+ cmdArray = new String[6];
cmdArray[0] = "diff";
- cmdArray[1] = "-I";
- cmdArray[2] = "\\(file:\\)\\|\\(/tmp/.*\\)";
- cmdArray[3] = (new File(logDir, tname + ".out")).getPath();
- cmdArray[4] = (new File(outDir, tname + ".out")).getPath();
+ cmdArray[1] = "-a";
+ cmdArray[2] = "-I";
+ cmdArray[3] = "\\(file:\\)\\|\\(/tmp/.*\\)";
+ cmdArray[4] = (new File(logDir, tname + ".out")).getPath();
+ cmdArray[5] = (new File(outDir, tname + ".out")).getPath();
System.out.println(cmdArray[0] + " " + cmdArray[1] + " " + cmdArray[2] + " " +
cmdArray[3] + " " + cmdArray[4]);
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/binarysortable_1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/binarysortable_1.q?rev=741339&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/binarysortable_1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/binarysortable_1.q Thu Feb 5 23:29:00 2009
@@ -0,0 +1,24 @@
+CREATE TABLE mytable(key STRING, value STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '9'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/string.txt' INTO TABLE mytable;
+
+EXPLAIN
+SELECT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(key, '\001', '^A'), '\0', '^@'), '\002', '^B'), value
+FROM (
+ SELECT key, sum(value) as value
+ FROM mytable
+ GROUP BY key
+) a;
+
+SELECT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(key, '\001', '^A'), '\0', '^@'), '\002', '^B'), value
+FROM (
+ SELECT key, sum(value) as value
+ FROM mytable
+ GROUP BY key
+) a;
+
+
+DROP TABLE mytable;
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out?rev=741339&view=auto
==============================================================================
Files hadoop/hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out (added) and hadoop/hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out Thu Feb 5 23:29:00 2009 differ
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java?rev=741339&r1=741338&r2=741339&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TBinarySortableProtocol.java Thu Feb 5 23:29:00 2009
@@ -45,7 +45,7 @@
* Int: flip the sign-bit to make sure negative comes before positive
* Long: flip the sign-bit to make sure negative comes before positive
* Double: flip the sign-bit for positive double, and all bits for negative double values
- * String: NULL-terminated string
+ * String: NULL-terminated UTF-8 string, with NULL escaped to \1 \1, and \1 escaped to \1 \2
* NON-NULL Complex Types:
* Struct: first the single byte \1, and then one field by one field.
* List: size stored as Int (see above), then one element by one element.
@@ -283,6 +283,16 @@
final protected byte[] nullByte = new byte[] {0};
final protected byte[] nonNullByte = new byte[] {1};
+ /**
+ * The escaped byte sequence for the null byte.
+ * This cannot be changed alone without changing the readString() code.
+ */
+ final protected byte[] escapedNull = new byte[] {1,1};
+ /**
+ * The escaped byte sequence for the "\1" byte.
+ * This cannot be changed alone without changing the readString() code.
+ */
+ final protected byte[] escapedOne = new byte[] {1,2};
public void writeString(String str) throws TException {
byte[] dat;
try {
@@ -290,13 +300,31 @@
} catch (UnsupportedEncodingException uex) {
throw new TException("JVM DOES NOT SUPPORT UTF-8: " + uex.getMessage());
}
- for(int i=0; i<str.length(); i++) {
- if (str.charAt(i) == '\0') {
- throw new TException( getClass().getName() + " does not support serializing strings with null bytes!");
+ writeRawBytes(nonNullByte, 0, 1);
+ int begin = 0;
+ int i = 0;
+ for (; i < dat.length; i++) {
+ if (dat[i] == 0 || dat[i] == 1) {
+ // Write the first part of the array
+ if (i > begin) {
+ writeRawBytes(dat, begin, i-begin);
+ }
+ // Write the escaped byte.
+ if (dat[i] == 0) {
+ writeRawBytes(escapedNull, 0, escapedNull.length);
+ } else {
+ writeRawBytes(escapedOne, 0, escapedOne.length);
+ }
+ // Move the pointer to the next byte, since we have written
+ // out the escaped byte in the block above already.
+ begin = i+1;
}
}
- writeRawBytes(nonNullByte, 0, 1);
- writeRawBytes(dat, 0, dat.length);
+ // Write the remaining part of the array
+ if (i > begin) {
+ writeRawBytes(dat, begin, i-begin);
+ }
+ // Write the terminating NULL byte
writeRawBytes(nullByte, 0, 1);
}
@@ -507,14 +535,20 @@
while (true) {
readRawAll(bin, 0, 1);
if (bin[0] == 0) {
+ // End of string.
break;
- } else {
- if (i == stringBytes.length) {
- stringBytes = Arrays.copyOf(stringBytes, stringBytes.length*2);
- }
- stringBytes[i] = bin[0];
- i++;
}
+ if (bin[0] == 1) {
+ // Escaped byte, unescape it.
+ readRawAll(bin, 0, 1);
+ assert(bin[0] == 1 || bin[0] == 2);
+ bin[0] = (byte)(bin[0] - 1);
+ }
+ if (i == stringBytes.length) {
+ stringBytes = Arrays.copyOf(stringBytes, stringBytes.length*2);
+ }
+ stringBytes[i] = bin[0];
+ i++;
}
try {
String r = new String(stringBytes, 0, i, "UTF-8");