You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by md...@apache.org on 2009/02/23 19:19:41 UTC

svn commit: r747096 - in /jackrabbit/trunk/jackrabbit-jcr-commons/src: main/java/org/apache/jackrabbit/util/Text.java test/java/org/apache/jackrabbit/util/TextTest.java

Author: mduerig
Date: Mon Feb 23 18:19:40 2009
New Revision: 747096

URL: http://svn.apache.org/viewvc?rev=747096&view=rev
Log:
JCR-1976: Text.unescape() should should preserve 'unicode' 

Modified:
    jackrabbit/trunk/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/util/Text.java
    jackrabbit/trunk/jackrabbit-jcr-commons/src/test/java/org/apache/jackrabbit/util/TextTest.java

Modified: jackrabbit/trunk/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/util/Text.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/util/Text.java?rev=747096&r1=747095&r2=747096&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/util/Text.java (original)
+++ jackrabbit/trunk/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/util/Text.java Mon Feb 23 18:19:40 2009
@@ -16,8 +16,8 @@
  */
 package org.apache.jackrabbit.util;
 
-import java.io.UnsupportedEncodingException;
 import java.io.ByteArrayOutputStream;
+import java.io.UnsupportedEncodingException;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
@@ -393,34 +393,35 @@
      * @param escape the escape character
      * @return the decoded string
      * @throws NullPointerException           if <code>string</code> is <code>null</code>.
-     * @throws ArrayIndexOutOfBoundsException if not enough character follow an
-     *                                        escape character
      * @throws IllegalArgumentException       if the 2 characters following the escape
-     *                                        character do not represent a hex-number.
+     *                                        character do not represent a hex-number
+     *                                        or if not enough characters follow an
+     *                                        escape character
      */
-    public static String unescape(String string, char escape) {
-        ByteArrayOutputStream out = new ByteArrayOutputStream(string.length());
-        for (int i = 0; i < string.length(); i++) {
-            char c = string.charAt(i);
-            if (c != escape) {
-                out.write(c);
-            } else if (i + 2 < string.length()) {
-                try {
-                    out.write(Integer.parseInt(string.substring(i + 1, i + 3), 16));
-                } catch (NumberFormatException e) {
-                    throw new IllegalArgumentException(
-                            "Escape sequence is not hexadecimal: " + string);
+    public static String unescape(String string, char escape)  {
+        try {
+            byte[] utf8 = string.getBytes("utf-8");
+
+            // Check whether escape occurs at invalid position
+            if ((utf8.length >= 1 && utf8[utf8.length - 1] == escape) ||
+                (utf8.length >= 2 && utf8[utf8.length - 2] == escape)) {
+                throw new IllegalArgumentException("Premature end of escape sequence at end of input");
+            }
+
+            ByteArrayOutputStream out = new ByteArrayOutputStream(utf8.length);
+            for (int k = 0; k < utf8.length; k++) {
+                byte b = utf8[k];
+                if (b == escape) {
+                    out.write((decodeDigit(utf8[++k]) << 4) + decodeDigit(utf8[++k]));
+                }
+                else {
+                    out.write(b);
                 }
-                i += 2;
-            } else {
-                throw new IllegalArgumentException(
-                        "Escape sequence is too short: " + string);
             }
-        }
 
-        try {
             return new String(out.toByteArray(), "utf-8");
-        } catch (UnsupportedEncodingException e) {
+        }
+        catch (UnsupportedEncodingException e) {
             throw new InternalError(e.toString());
         }
     }
@@ -486,7 +487,7 @@
      * <p>Example:<br>
      * A search string like 'test?' will run into a ParseException
      * documented in http://issues.apache.org/jira/browse/JCR-1248
-     * 
+     *
      * @param s the string to encode
      * @return the escaped string
      */
@@ -495,8 +496,8 @@
         sb.append(s.substring(0, (s.length() - 1)));
         char c = s.charAt(s.length() - 1);
         // NOTE: keep this in sync with _ESCAPED_CHAR below!
-        if (c == '!' || c == '(' || c == ':' || c == '^' 
-            || c == '[' || c == ']' || c == '\"' || c == '{' 
+        if (c == '!' || c == '(' || c == ':' || c == '^'
+            || c == '[' || c == ']' || c == '\"' || c == '{'
             || c == '}' || c == '?') {
             sb.append('\\');
         }
@@ -556,7 +557,7 @@
      */
     public static String getName(String path, char delim) {
         return path == null
-                ? null 
+                ? null
                 : path.substring(path.lastIndexOf(delim) + 1);
     }
 
@@ -763,4 +764,20 @@
 
         return result.toString();
     }
+
+    private static byte decodeDigit(byte b) {
+        if (b >= 0x30 && b <= 0x39) {
+            return (byte) (b - 0x30);
+        }
+        else if (b >= 0x41 && b <= 0x46) {
+            return (byte) (b - 0x37);
+        }
+        else if (b >= 0x61 && b <= 0x66) {
+            return (byte) (b - 0x57);
+        }
+        else {
+            throw new IllegalArgumentException("Escape sequence is not hexadecimal: " + (char)b);
+        }
+    }
+
 }

Modified: jackrabbit/trunk/jackrabbit-jcr-commons/src/test/java/org/apache/jackrabbit/util/TextTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-jcr-commons/src/test/java/org/apache/jackrabbit/util/TextTest.java?rev=747096&r1=747095&r2=747096&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-jcr-commons/src/test/java/org/apache/jackrabbit/util/TextTest.java (original)
+++ jackrabbit/trunk/jackrabbit-jcr-commons/src/test/java/org/apache/jackrabbit/util/TextTest.java Mon Feb 23 18:19:40 2009
@@ -16,16 +16,15 @@
  */
 package org.apache.jackrabbit.util;
 
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
 import junit.framework.TestCase;
+
 import org.apache.jackrabbit.name.IllegalNameException;
 import org.apache.jackrabbit.name.NameFormat;
 
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.HashMap;
-
 /**
  * Test cases for the Text utility class.
  */
@@ -146,18 +145,46 @@
         }
     }
 
+    public void testUrlEscape() {
+        String testString = "\u4e2d\u56fd\u7684\u7f51\u9875 $% \u20acuro %$ ";
+
+        String escaped = testString
+            .replaceAll("%", "%25")
+            .replaceAll(" ", "%20");
+        String unescaped = Text.unescape(escaped);
+        assertEquals(testString, unescaped);
+
+        escaped = Text.escape(testString);
+        unescaped = Text.unescape(escaped);
+        assertEquals(testString, unescaped);
+
+        assertEquals("%", Text.unescape("%25"));
+        assertEquals("", Text.unescape(""));
+        assertEquals("\u4e2d\u56fd\u7684\u7f51\u9875", Text.unescape("\u4e2d\u56fd\u7684\u7f51\u9875"));
+    }
+
     /**
      * @see <a href="https://issues.apache.org/jira/browse/JCR-1926">JCR-1926</a>
      */
     public void testUnescapeWithInvalidInput() {
         assertInvalidUnescape("%");   // too short
+        assertInvalidUnescape("anything%");   // too short
         assertInvalidUnescape("%%");  // too short
+        assertInvalidUnescape("anything%%");  // too short
+        assertInvalidUnescape("%1");  // too short
+        assertInvalidUnescape("anything%1");  // too short
         assertInvalidUnescape("%%%"); // not a number
+        assertInvalidUnescape("%ag"); // not a number
+        assertInvalidUnescape("anything%%%"); // not a number
+        assertInvalidUnescape("anything%ag"); // not a number
+        assertInvalidUnescape("anything%%%anything"); // not a number
+        assertInvalidUnescape("anything%aganything"); // not a number
     }
-    
+
     private void assertInvalidUnescape(String string) {
         try {
             Text.unescape(string);
+            fail("Text.unescape(" + string + ") should throw IllegalArgumentException");
         } catch (IllegalArgumentException expected) {
         } catch (RuntimeException unexpected) {
             fail("Text.unescape(" + string + "): " + unexpected.getMessage());