You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/07/31 23:04:47 UTC

svn commit: r427269 - in /lucene/hadoop/trunk/src: java/org/apache/hadoop/io/Text.java test/org/apache/hadoop/io/TestText.java

Author: cutting
Date: Mon Jul 31 14:04:47 2006
New Revision: 427269

URL: http://svn.apache.org/viewvc?rev=427269&view=rev
Log:
HADOOP-393.  Fix a bug in Text validation.

Modified:
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Text.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestText.java

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Text.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Text.java?rev=427269&r1=427268&r2=427269&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Text.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Text.java Mon Jul 31 14:04:47 2006
@@ -415,13 +415,17 @@
    * @param utf8: byte array
    * @exception MalformedInputException if the byte array contains invalid utf-8
    */
-  public static void validateUTF8(byte[] utf8) 
+  public static void validateUTF8(byte[] utf8) throws MalformedInputException {
+     validateUTF(utf8, 0, utf8.length);     
+  }
+  
+  public static void validateUTF(byte[] utf8, int start, int len)
     throws MalformedInputException {
-    int count = 0;
+    int count = start;
     int leadByte = 0;
     int length = 0;
     int state = LEAD_BYTE;
-    while (count < utf8.length) {
+    while (count < start+len) {
       int aByte = ((int) utf8[count] & 0xFF);
 
       switch (state) {
@@ -433,7 +437,6 @@
         case 0: // check for ASCII
           if (leadByte > 0x7E)
             throw new MalformedInputException(count);
-          state = TRAIL_BYTE;
           break;
         case 1:
           if (leadByte < 0xC2 || leadByte > 0xDF)

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestText.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestText.java?rev=427269&r1=427268&r2=427269&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestText.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestText.java Mon Jul 31 14:04:47 2006
@@ -18,9 +18,7 @@
 
 import junit.framework.TestCase;
 
-import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
 import java.util.Random;
 
 import org.apache.commons.logging.Log;
@@ -72,25 +70,24 @@
 
   public void testWritable() throws Exception {
     for (int i = 0; i < NUM_ITERATIONS; i++) {
-      try {
         String str;
         if(i == 0 )
             str = getLongString();
         else
             str = getTestString();
         TestWritable.testWritable(new Text(str));
-      } catch (IOException e) {
-          LOG.info(e);
-      }
     }
   }
 
 
   public void testCoding() throws Exception {
-    for (int i = 0; i < NUM_ITERATIONS; i++) {
-      try {
+      String before = "Bad \t encoding \t testcase";
+      Text text = new Text(before);
+      String after = text.toString();
+      assertTrue(before.equals(after));
+
+      for (int i = 0; i < NUM_ITERATIONS; i++) {
           // generate a random string
-          String before;
           if(i == 0 )
               before = getLongString();
           else
@@ -106,12 +103,9 @@
                       utf8Java, 0, utf8Java.length));
               
           // test utf8 to string
-          String after = Text.decode(utf8Java);
+          after = Text.decode(utf8Java);
           assertTrue(before.equals(after));
-      }catch(CharacterCodingException e) {
-          LOG.info( e );
       }
-    }
   }
   
   
@@ -120,31 +114,27 @@
     DataInputBuffer in = new DataInputBuffer();
 
     for (int i = 0; i < NUM_ITERATIONS; i++) {
-        try {
-          // generate a random string
-          String before;          
-          if(i == 0 )
-              before = getLongString();
-          else
-              before = getTestString();
-
-          // write it
-          out.reset();
-          Text.writeString(out, before);
-
-          // test that it reads correctly
-          in.reset(out.getData(), out.getLength());
-          String after = Text.readString(in);
-          assertTrue(before.equals(after));
-    
-          // Test compatibility with Java's other decoder 
-          int strLenSize = WritableUtils.getVIntSize(Text.utf8Length(before));
-          String after2 = new String(out.getData(), strLenSize, 
-          out.getLength()-strLenSize, "UTF-8");
-              assertTrue(before.equals(after2));
-        }catch(IOException e) {
-            LOG.info(e);
-        }
+        // generate a random string
+        String before;          
+        if(i == 0 )
+            before = getLongString();
+        else
+            before = getTestString();
+        
+        // write it
+        out.reset();
+        Text.writeString(out, before);
+        
+        // test that it reads correctly
+        in.reset(out.getData(), out.getLength());
+        String after = Text.readString(in);
+        assertTrue(before.equals(after));
+        
+        // Test compatibility with Java's other decoder 
+        int strLenSize = WritableUtils.getVIntSize(Text.utf8Length(before));
+        String after2 = new String(out.getData(), strLenSize, 
+                out.getLength()-strLenSize, "UTF-8");
+        assertTrue(before.equals(after2));
       }
   }
 
@@ -154,7 +144,6 @@
       DataOutputBuffer out3 = new DataOutputBuffer();
       Text.Comparator comparator = new Text.Comparator();
       for (int i=0; i<NUM_ITERATIONS; i++ ) {
-        try {
           // reset output buffer
           out1.reset();
           out2.reset();
@@ -193,24 +182,23 @@
           assertEquals(txt1.compareTo(txt3), 0);
           assertEquals(comparator.compare(out1.getData(), 0, out3.getLength(),
                   out3.getData(), 0, out3.getLength()), 0);
-        } catch (IOException e) {
-            LOG.info(e);
-        }
       }
   }
       
   public void testFind() throws Exception {
-      try {
-          Text text = new Text("abcd\u20acbdcd\u20ac");
-          assertTrue(text.find("abd")==-1);
-          assertTrue(text.find("ac")==-1);
-          assertTrue(text.find("\u20ac")==4);
-          assertTrue(text.find("\u20ac", 5)==11);
-      } catch( CharacterCodingException e) {
-          LOG.warn(e);
-      }
+      Text text = new Text("abcd\u20acbdcd\u20ac");
+      assertTrue(text.find("abd")==-1);
+      assertTrue(text.find("ac")==-1);
+      assertTrue(text.find("\u20ac")==4);
+      assertTrue(text.find("\u20ac", 5)==11);
+  }
+
+  public void testValidate() throws Exception {
+      Text text = new Text("abcd\u20acbdcd\u20ac");
+      byte [] utf8 = text.getBytes();
+      int length = text.getLength();
+      Text.validateUTF(utf8, 0, length);
   }
-  
   public static void main(String[] args)  throws Exception
   {
     TestText test = new TestText("main");
@@ -219,5 +207,6 @@
     test.testCoding();
     test.testWritable();
     test.testFind();
+    test.testValidate();
   }
 }