You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by ba...@apache.org on 2006/06/27 02:28:44 UTC
svn commit: r417319 - in /jakarta/commons/proper/lang/trunk/src:
java/org/apache/commons/lang/RandomStringUtils.java
test/org/apache/commons/lang/RandomStringUtilsTest.java
Author: bayard
Date: Mon Jun 26 17:28:43 2006
New Revision: 417319
URL: http://svn.apache.org/viewvc?rev=417319&view=rev
Log:
Adding a test and a fix for LANG-100. This is a bug in which the randomly created String can sometimes be illegal unicode; because the code does not consider when relationships exist between characters. High and low surrogates are now dealt with, but I'm skipping private high surrogates because I can't find out what to do. Need to go plod very slowly through the spec. This site was very useful: http://www.alanwood.net/unicode/private_use_high_surrogates.html
Modified:
jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java
jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java
Modified: jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java
URL: http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java?rev=417319&r1=417318&r2=417319&view=diff
==============================================================================
--- jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java (original)
+++ jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java Mon Jun 26 17:28:43 2006
@@ -18,6 +18,14 @@
import java.util.Random;
/**
* <p>Operations for random <code>String</code>s.</p>
+ * <p>Currently <em>private high surrogate</em> characters are ignored.
+ * These are unicode characters that fall between the values 56192 (db80)
+ * and 56319 (dbff) as we don't know how to handle them.
+ * High and low surrogates are correctly dealt with - that is if a
+ * high surrogate is randomly chosen, 55296 (d800) to 56191 (db7f)
+ * then it is followed by a low surrogate. If a low surrogate is chosen,
+ * 56320 (dc00) to 57343 (dfff) then it is placed after a randomly
+ * chosen high surrogate. </p>
*
* @author GenerationJava Core library
* @author <a href="mailto:bayard@generationjava.com">Henri Yandell</a>
@@ -243,8 +251,32 @@
}
if ((letters && Character.isLetter(ch))
|| (numbers && Character.isDigit(ch))
- || (!letters && !numbers)) {
- buffer[count] = ch;
+ || (!letters && !numbers))
+ {
+ if(ch >= 56320 && ch <= 57343) {
+ if(count == 0) {
+ count++;
+ } else {
+ // low surrogate, insert high surrogate after putting it in
+ buffer[count] = ch;
+ count--;
+ buffer[count] = (char) (55296 + random.nextInt(128));
+ }
+ } else if(ch >= 55296 && ch <= 56191) {
+ if(count == 0) {
+ count++;
+ } else {
+ // high surrogate, insert low surrogate before putting it in
+ buffer[count] = (char) (56320 + random.nextInt(128));
+ count--;
+ buffer[count] = ch;
+ }
+ } else if(ch >= 56192 && ch <= 56319) {
+ // private high surrogate, no effing clue, so skip it
+ count++;
+ } else {
+ buffer[count] = ch;
+ }
} else {
count++;
}
Modified: jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java
URL: http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java?rev=417319&r1=417318&r2=417319&view=diff
==============================================================================
--- jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java (original)
+++ jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java Mon Jun 26 17:28:43 2006
@@ -315,7 +315,33 @@
}
return sumSq;
}
-
+
+ /**
+ * Checks if the string got by {@link RandomStringUtils#random(int)}
+ * can be converted to UTF-8 and back without loss.
+ *
+ * @author stefanhoehne@fastmail.fm
+ * @throws Exception
+ */
+ public void testLang100() throws Exception {
+ int size = 5000;
+ String encoding = "UTF-8";
+ String orig = RandomStringUtils.random(size);
+ byte[] bytes = orig.getBytes(encoding);
+ String copy = new String(bytes, encoding);
+
+ // for a verbose compare:
+ for (int i=0; i < orig.length() && i < copy.length(); i++) {
+ char o = orig.charAt(i);
+ char c = copy.charAt(i);
+ assertEquals("differs at " + i + "(" + Integer.toHexString((new Character(o)).hashCode()) + "," +
+ Integer.toHexString((new Character(c)).hashCode()) + ")", o, c);
+ }
+ // compare length also
+ assertEquals(orig.length(), copy.length());
+ // just to be complete
+ assertEquals(orig, copy);
+ }
public static void main(String args[]) {
TestRunner.run(suite());
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org