You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2010/02/10 22:58:24 UTC
svn commit: r908661 - in /hadoop/common/trunk: CHANGES.txt
src/java/org/apache/hadoop/io/UTF8.java
src/test/core/org/apache/hadoop/io/TestUTF8.java
Author: cutting
Date: Wed Feb 10 21:58:11 2010
New Revision: 908661
URL: http://svn.apache.org/viewvc?rev=908661&view=rev
Log:
HADOOP-6522. Fix decoding of codepoint zero in UTF8.
Modified:
hadoop/common/trunk/CHANGES.txt
hadoop/common/trunk/src/java/org/apache/hadoop/io/UTF8.java
hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestUTF8.java
Modified: hadoop/common/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/CHANGES.txt?rev=908661&r1=908660&r2=908661&view=diff
==============================================================================
--- hadoop/common/trunk/CHANGES.txt (original)
+++ hadoop/common/trunk/CHANGES.txt Wed Feb 10 21:58:11 2010
@@ -184,6 +184,8 @@
HADOOP-6540. Contrib unit tests have invalid XML for core-site, etc.
(Aaron Kimball via tomwhite)
+ HADOOP-6522. Fix decoding of codepoint zero in UTF8. (cutting)
+
Release 0.21.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/common/trunk/src/java/org/apache/hadoop/io/UTF8.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/UTF8.java?rev=908661&r1=908660&r2=908661&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/io/UTF8.java (original)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/io/UTF8.java Wed Feb 10 21:58:11 2010
@@ -253,7 +253,7 @@
int utf8Length = 0;
for (int i = 0; i < stringLength; i++) {
int c = string.charAt(i);
- if ((c >= 0x0001) && (c <= 0x007F)) {
+ if (c <= 0x007F) {
utf8Length++;
} else if (c > 0x07FF) {
utf8Length += 3;
@@ -270,7 +270,7 @@
final int end = start + length;
for (int i = start; i < end; i++) {
int code = s.charAt(i);
- if (code >= 0x01 && code <= 0x7F) {
+ if (code <= 0x7F) {
out.writeByte((byte)code);
} else if (code <= 0x07FF) {
out.writeByte((byte)(0xC0 | ((code >> 6) & 0x1F)));
Modified: hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestUTF8.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestUTF8.java?rev=908661&r1=908660&r2=908661&view=diff
==============================================================================
--- hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestUTF8.java (original)
+++ hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestUTF8.java Wed Feb 10 21:58:11 2010
@@ -22,6 +22,7 @@
import java.util.Random;
/** Unit tests for UTF8. */
+@SuppressWarnings("deprecation")
public class TestUTF8 extends TestCase {
public TestUTF8(String name) { super(name); }
@@ -37,13 +38,13 @@
}
public void testWritable() throws Exception {
- for (int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10000; i++) {
TestWritable.testWritable(new UTF8(getTestString()));
}
}
public void testGetBytes() throws Exception {
- for (int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10000; i++) {
// generate a random string
String before = getTestString();
@@ -57,7 +58,7 @@
DataOutputBuffer out = new DataOutputBuffer();
DataInputBuffer in = new DataInputBuffer();
- for (int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10000; i++) {
// generate a random string
String before = getTestString();
@@ -82,5 +83,14 @@
}
}
+
+ public void testNullEncoding() throws Exception {
+ String s = new String(new char[] { 0 });
+
+ DataOutputBuffer dob = new DataOutputBuffer();
+ new UTF8(s).write(dob);
+
+ assertEquals(s, new String(dob.getData(), 2, dob.getLength()-2, "UTF-8"));
+ }
}