You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by br...@apache.org on 2015/03/13 19:19:00 UTC

svn commit: r1666535 - in /commons/proper/lang/trunk/src: changes/ main/java/org/apache/commons/lang3/ main/java/org/apache/commons/lang3/text/translate/ test/java/org/apache/commons/lang3/

Author: britter
Date: Fri Mar 13 18:18:59 2015
New Revision: 1666535

URL: http://svn.apache.org/r1666535
Log:
LANG-877: Performance improvements for StringEscapeUtils. This fixes #49 from github. Thanks to Fabian Lange.

Modified:
    commons/proper/lang/trunk/src/changes/changes.xml
    commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/CharUtils.java
    commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java
    commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java
    commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/CharUtilsTest.java

Modified: commons/proper/lang/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/changes/changes.xml?rev=1666535&r1=1666534&r2=1666535&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/changes/changes.xml [utf-8] (original)
+++ commons/proper/lang/trunk/src/changes/changes.xml [utf-8] Fri Mar 13 18:18:59 2015
@@ -22,6 +22,7 @@
   <body>
 
   <release version="3.4" date="tba" description="tba">
+    <action issue="LANG-877" type="add" dev="britter" due-to="Fabian Lange">Performance improvements for StringEscapeUtils</action>
     <action issue="LANG-1093" type="add" dev="britter" due-to="Fabian Lange">Add ClassUtils.getAbbreviatedName()</action>
     <action issue="LANG-1090" type="fix" dev="sebb">FastDateParser does not set error indication in ParsePosition</action>
     <action issue="LANG-1089" type="fix" dev="sebb">FastDateParser does not handle excess hours as per SimpleDateFormat</action>

Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/CharUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/CharUtils.java?rev=1666535&r1=1666534&r2=1666535&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/CharUtils.java (original)
+++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/CharUtils.java Fri Mar 13 18:18:59 2015
@@ -31,6 +31,8 @@ public class CharUtils {
     
     private static final String[] CHAR_STRING_ARRAY = new String[128];
     
+    private static final char[] HEX_DIGITS = new char[] {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
+
     /**
      * {@code \u000a} linefeed LF ('\n').
      * 
@@ -350,14 +352,13 @@ public class CharUtils {
      * @return the escaped Unicode string
      */
     public static String unicodeEscaped(final char ch) {
-        if (ch < 0x10) {
-            return "\\u000" + Integer.toHexString(ch);
-        } else if (ch < 0x100) {
-            return "\\u00" + Integer.toHexString(ch);
-        } else if (ch < 0x1000) {
-            return "\\u0" + Integer.toHexString(ch);
-        }
-        return "\\u" + Integer.toHexString(ch);
+        StringBuilder sb = new StringBuilder(6);
+        sb.append("\\u");
+        sb.append(HEX_DIGITS[(ch >> 12) & 15]);
+        sb.append(HEX_DIGITS[(ch >> 8) & 15]);
+        sb.append(HEX_DIGITS[(ch >> 4) & 15]);
+        sb.append(HEX_DIGITS[(ch) & 15]);
+        return sb.toString();
     }
     
     /**

Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java?rev=1666535&r1=1666534&r2=1666535&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java (original)
+++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java Fri Mar 13 18:18:59 2015
@@ -31,6 +31,8 @@ import java.util.Locale;
  */
 public abstract class CharSequenceTranslator {
 
+    static final char[] HEX_DIGITS = new char[] {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
+
     /**
      * Translate a set of codepoints, represented by an int index into a CharSequence, 
      * into another set of codepoints. The number of codepoints consumed must be returned, 
@@ -84,9 +86,18 @@ public abstract class CharSequenceTransl
         while (pos < len) {
             final int consumed = translate(input, pos, out);
             if (consumed == 0) {
-                final char[] c = Character.toChars(Character.codePointAt(input, pos));
-                out.write(c);
-                pos+= c.length;
+                // inlined implementation of Character.toChars(Character.codePointAt(input, pos))
+                // avoids allocating temp char arrays and duplicate checks
+                char c1 = input.charAt(pos);
+                out.write(c1);
+                pos++;
+                if (Character.isHighSurrogate(c1) && pos < len) {
+                    char c2 = input.charAt(pos);
+                    if (Character.isLowSurrogate(c2)) {
+                      out.write(c2);
+                      pos++;
+                    }
+                }
                 continue;
             }
             // contract with translators is that they have to understand codepoints

Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java?rev=1666535&r1=1666534&r2=1666535&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java (original)
+++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java Fri Mar 13 18:18:59 2015
@@ -114,14 +114,12 @@ public class UnicodeEscaper extends Code
         // TODO: Handle potential + sign per various Unicode escape implementations
         if (codepoint > 0xffff) {
             out.write(toUtf16Escape(codepoint));
-        } else if (codepoint > 0xfff) {
-            out.write("\\u" + hex(codepoint));
-        } else if (codepoint > 0xff) {
-            out.write("\\u0" + hex(codepoint));
-        } else if (codepoint > 0xf) {
-            out.write("\\u00" + hex(codepoint));
         } else {
-            out.write("\\u000" + hex(codepoint));
+          out.write("\\u");
+          out.write(HEX_DIGITS[(codepoint >> 12) & 15]);
+          out.write(HEX_DIGITS[(codepoint >> 8) & 15]);
+          out.write(HEX_DIGITS[(codepoint >> 4) & 15]);
+          out.write(HEX_DIGITS[(codepoint) & 15]);
         }
         return true;
     }

Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/CharUtilsTest.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/CharUtilsTest.java?rev=1666535&r1=1666534&r2=1666535&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/CharUtilsTest.java (original)
+++ commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/CharUtilsTest.java Fri Mar 13 18:18:59 2015
@@ -194,6 +194,7 @@ public class CharUtilsTest {
     @Test
     public void testToUnicodeEscaped_char() {
         assertEquals("\\u0041", CharUtils.unicodeEscaped('A'));
+        assertEquals("\\u004c", CharUtils.unicodeEscaped('L'));
        
         for (int i = 0; i < 196; i++) {
             final String str = CharUtils.unicodeEscaped((char) i);