You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by ba...@apache.org on 2006/06/27 02:28:44 UTC

svn commit: r417319 - in /jakarta/commons/proper/lang/trunk/src: java/org/apache/commons/lang/RandomStringUtils.java test/org/apache/commons/lang/RandomStringUtilsTest.java

Author: bayard
Date: Mon Jun 26 17:28:43 2006
New Revision: 417319

URL: http://svn.apache.org/viewvc?rev=417319&view=rev
Log:
Adding a test and a fix for LANG-100. This is a bug in which the randomly created String can sometimes be illegal unicode; because the code does not consider when relationships exist between characters. High and low surrogates are now dealt with, but I'm skipping private high surrogates because I can't find out what to do. Need to go plod very slowly through the spec. This site was very useful: http://www.alanwood.net/unicode/private_use_high_surrogates.html

Modified:
    jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java
    jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java

Modified: jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java
URL: http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java?rev=417319&r1=417318&r2=417319&view=diff
==============================================================================
--- jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java (original)
+++ jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java Mon Jun 26 17:28:43 2006
@@ -18,6 +18,14 @@
 import java.util.Random;
 /**
  * <p>Operations for random <code>String</code>s.</p>
+ * <p>Currently <em>private high surrogate</em> characters are ignored. 
+ * These are unicode characters that fall between the values 56192 (db80)
+ * and 56319 (dbff) as we don't know how to handle them. 
+ * High and low surrogates are correctly dealt with - that is if a 
+ * high surrogate is randomly chosen, 55296 (d800) to 56191 (db7f) 
+ * then it is followed by a low surrogate. If a low surrogate is chosen, 
+ * 56320 (dc00) to 57343 (dfff) then it is placed after a randomly 
+ * chosen high surrogate. </p>
  *
  * @author GenerationJava Core library
  * @author <a href="mailto:bayard@generationjava.com">Henri Yandell</a>
@@ -243,8 +251,32 @@
             }
             if ((letters && Character.isLetter(ch))
                 || (numbers && Character.isDigit(ch))
-                || (!letters && !numbers)) {
-                buffer[count] = ch;
+                || (!letters && !numbers)) 
+            {
+                if(ch >= 56320 && ch <= 57343) {
+                    if(count == 0) {
+                        count++;
+                    } else {
+                        // low surrogate, insert high surrogate after putting it in
+                        buffer[count] = ch;
+                        count--;
+                        buffer[count] = (char) (55296 + random.nextInt(128));
+                    }
+                } else if(ch >= 55296 && ch <= 56191) {
+                    if(count == 0) {
+                        count++;
+                    } else {
+                        // high surrogate, insert low surrogate before putting it in
+                        buffer[count] = (char) (56320 + random.nextInt(128));
+                        count--;
+                        buffer[count] = ch;
+                    }
+                } else if(ch >= 56192 && ch <= 56319) {
+                    // private high surrogate, no effing clue, so skip it
+                    count++;
+                } else {
+                    buffer[count] = ch;
+                }
             } else {
                 count++;
             }

Modified: jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java
URL: http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java?rev=417319&r1=417318&r2=417319&view=diff
==============================================================================
--- jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java (original)
+++ jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java Mon Jun 26 17:28:43 2006
@@ -315,7 +315,33 @@
         }
         return sumSq;
     }           
-        
+
+    /**
+     * Checks if the string got by {@link RandomStringUtils#random(int)}
+     * can be converted to UTF-8 and back without loss.
+     *
+     * @author stefanhoehne@fastmail.fm
+     * @throws Exception
+     */
+    public void testLang100() throws Exception {
+        int size = 5000;
+        String encoding = "UTF-8";
+        String orig = RandomStringUtils.random(size);
+        byte[] bytes = orig.getBytes(encoding);
+        String copy = new String(bytes, encoding);
+
+        // for a verbose compare:
+        for (int i=0; i < orig.length() && i < copy.length(); i++) {
+            char o = orig.charAt(i);
+            char c = copy.charAt(i);
+            assertEquals("differs at " + i + "(" + Integer.toHexString((new Character(o)).hashCode()) + "," +
+            Integer.toHexString((new Character(c)).hashCode()) + ")", o, c);
+        }
+        // compare length also
+        assertEquals(orig.length(), copy.length());
+        // just to be complete
+        assertEquals(orig, copy);
+    }
 
     public static void main(String args[]) {
         TestRunner.run(suite());



---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org