You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/07/31 23:04:47 UTC
svn commit: r427269 - in /lucene/hadoop/trunk/src:
java/org/apache/hadoop/io/Text.java test/org/apache/hadoop/io/TestText.java
Author: cutting
Date: Mon Jul 31 14:04:47 2006
New Revision: 427269
URL: http://svn.apache.org/viewvc?rev=427269&view=rev
Log:
HADOOP-393. Fix a bug in Text validation.
Modified:
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Text.java
lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestText.java
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Text.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Text.java?rev=427269&r1=427268&r2=427269&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Text.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Text.java Mon Jul 31 14:04:47 2006
@@ -415,13 +415,17 @@
* @param utf8: byte array
* @exception MalformedInputException if the byte array contains invalid utf-8
*/
- public static void validateUTF8(byte[] utf8)
+ public static void validateUTF8(byte[] utf8) throws MalformedInputException {
+ validateUTF(utf8, 0, utf8.length);
+ }
+
+ public static void validateUTF(byte[] utf8, int start, int len)
throws MalformedInputException {
- int count = 0;
+ int count = start;
int leadByte = 0;
int length = 0;
int state = LEAD_BYTE;
- while (count < utf8.length) {
+ while (count < start+len) {
int aByte = ((int) utf8[count] & 0xFF);
switch (state) {
@@ -433,7 +437,6 @@
case 0: // check for ASCII
if (leadByte > 0x7E)
throw new MalformedInputException(count);
- state = TRAIL_BYTE;
break;
case 1:
if (leadByte < 0xC2 || leadByte > 0xDF)
Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestText.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestText.java?rev=427269&r1=427268&r2=427269&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestText.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestText.java Mon Jul 31 14:04:47 2006
@@ -18,9 +18,7 @@
import junit.framework.TestCase;
-import java.io.IOException;
import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
import java.util.Random;
import org.apache.commons.logging.Log;
@@ -72,25 +70,24 @@
public void testWritable() throws Exception {
for (int i = 0; i < NUM_ITERATIONS; i++) {
- try {
String str;
if(i == 0 )
str = getLongString();
else
str = getTestString();
TestWritable.testWritable(new Text(str));
- } catch (IOException e) {
- LOG.info(e);
- }
}
}
public void testCoding() throws Exception {
- for (int i = 0; i < NUM_ITERATIONS; i++) {
- try {
+ String before = "Bad \t encoding \t testcase";
+ Text text = new Text(before);
+ String after = text.toString();
+ assertTrue(before.equals(after));
+
+ for (int i = 0; i < NUM_ITERATIONS; i++) {
// generate a random string
- String before;
if(i == 0 )
before = getLongString();
else
@@ -106,12 +103,9 @@
utf8Java, 0, utf8Java.length));
// test utf8 to string
- String after = Text.decode(utf8Java);
+ after = Text.decode(utf8Java);
assertTrue(before.equals(after));
- }catch(CharacterCodingException e) {
- LOG.info( e );
}
- }
}
@@ -120,31 +114,27 @@
DataInputBuffer in = new DataInputBuffer();
for (int i = 0; i < NUM_ITERATIONS; i++) {
- try {
- // generate a random string
- String before;
- if(i == 0 )
- before = getLongString();
- else
- before = getTestString();
-
- // write it
- out.reset();
- Text.writeString(out, before);
-
- // test that it reads correctly
- in.reset(out.getData(), out.getLength());
- String after = Text.readString(in);
- assertTrue(before.equals(after));
-
- // Test compatibility with Java's other decoder
- int strLenSize = WritableUtils.getVIntSize(Text.utf8Length(before));
- String after2 = new String(out.getData(), strLenSize,
- out.getLength()-strLenSize, "UTF-8");
- assertTrue(before.equals(after2));
- }catch(IOException e) {
- LOG.info(e);
- }
+ // generate a random string
+ String before;
+ if(i == 0 )
+ before = getLongString();
+ else
+ before = getTestString();
+
+ // write it
+ out.reset();
+ Text.writeString(out, before);
+
+ // test that it reads correctly
+ in.reset(out.getData(), out.getLength());
+ String after = Text.readString(in);
+ assertTrue(before.equals(after));
+
+ // Test compatibility with Java's other decoder
+ int strLenSize = WritableUtils.getVIntSize(Text.utf8Length(before));
+ String after2 = new String(out.getData(), strLenSize,
+ out.getLength()-strLenSize, "UTF-8");
+ assertTrue(before.equals(after2));
}
}
@@ -154,7 +144,6 @@
DataOutputBuffer out3 = new DataOutputBuffer();
Text.Comparator comparator = new Text.Comparator();
for (int i=0; i<NUM_ITERATIONS; i++ ) {
- try {
// reset output buffer
out1.reset();
out2.reset();
@@ -193,24 +182,23 @@
assertEquals(txt1.compareTo(txt3), 0);
assertEquals(comparator.compare(out1.getData(), 0, out3.getLength(),
out3.getData(), 0, out3.getLength()), 0);
- } catch (IOException e) {
- LOG.info(e);
- }
}
}
public void testFind() throws Exception {
- try {
- Text text = new Text("abcd\u20acbdcd\u20ac");
- assertTrue(text.find("abd")==-1);
- assertTrue(text.find("ac")==-1);
- assertTrue(text.find("\u20ac")==4);
- assertTrue(text.find("\u20ac", 5)==11);
- } catch( CharacterCodingException e) {
- LOG.warn(e);
- }
+ Text text = new Text("abcd\u20acbdcd\u20ac");
+ assertTrue(text.find("abd")==-1);
+ assertTrue(text.find("ac")==-1);
+ assertTrue(text.find("\u20ac")==4);
+ assertTrue(text.find("\u20ac", 5)==11);
+ }
+
+ public void testValidate() throws Exception {
+ Text text = new Text("abcd\u20acbdcd\u20ac");
+ byte [] utf8 = text.getBytes();
+ int length = text.getLength();
+ Text.validateUTF(utf8, 0, length);
}
-
public static void main(String[] args) throws Exception
{
TestText test = new TestText("main");
@@ -219,5 +207,6 @@
test.testCoding();
test.testWritable();
test.testFind();
+ test.testValidate();
}
}