You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by ma...@apache.org on 2017/07/27 18:45:49 UTC
svn commit: r1803224 - in /tomcat/trunk: java/org/apache/tomcat/util/buf/LocalStrings.properties java/org/apache/tomcat/util/buf/UDecoder.java test/org/apache/tomcat/util/buf/TestUDecoder.java webapps/docs/changelog.xml

Author: markt
Date: Thu Jul 27 18:45:49 2017
New Revision: 1803224

URL: http://svn.apache.org/viewvc?rev=1803224&view=rev
Log:
Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=61351
Correctly handle %nn decoding of URL patterns in web.xml and similar locations that may legitimately contain characters that are not permitted by RFC 3986.

Modified:
    tomcat/trunk/java/org/apache/tomcat/util/buf/LocalStrings.properties
    tomcat/trunk/java/org/apache/tomcat/util/buf/UDecoder.java
    tomcat/trunk/test/org/apache/tomcat/util/buf/TestUDecoder.java
    tomcat/trunk/webapps/docs/changelog.xml

Modified: tomcat/trunk/java/org/apache/tomcat/util/buf/LocalStrings.properties
URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/tomcat/util/buf/LocalStrings.properties?rev=1803224&r1=1803223&r2=1803224&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/tomcat/util/buf/LocalStrings.properties (original)
+++ tomcat/trunk/java/org/apache/tomcat/util/buf/LocalStrings.properties Thu Jul 27 18:45:49 2017
@@ -19,7 +19,8 @@ c2bConverter.recycleFailed=Failed to rec
 hexUtils.fromHex.oddDigits=The input must consist of an even number of hex digits
 hexUtils.fromHex.nonHex=The input must consist only of hex digits
 
-uDecoder.urlDecode.missingDigit=The % character must be followed by two hexademical digits
+uDecoder.urlDecode.conversionError=Failed to decode [{0}] using character set [{1}]
+uDecoder.urlDecode.missingDigit=Failed to decode [{0}] because the % character must be followed by two hexademical digits
 uDecoder.convertHexDigit.notHex=[{0}] is not a hexadecimal digit
 
 byteBufferUtils.cleaner=Cannot use direct ByteBuffer cleaner, memory leaking may occur

Modified: tomcat/trunk/java/org/apache/tomcat/util/buf/UDecoder.java
URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/tomcat/util/buf/UDecoder.java?rev=1803224&r1=1803223&r2=1803224&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/tomcat/util/buf/UDecoder.java (original)
+++ tomcat/trunk/java/org/apache/tomcat/util/buf/UDecoder.java Thu Jul 27 18:45:49 2017
@@ -16,8 +16,10 @@
  */
 package org.apache.tomcat.util.buf;
 
+import java.io.ByteArrayOutputStream;
 import java.io.CharConversionException;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 
@@ -317,39 +319,68 @@ public final class UDecoder {
             return null;
         }
 
-        byte[] bytes = str.getBytes(StandardCharsets.US_ASCII);
+        int index = str.indexOf('%');
+        if (index == -1) {
+            // No %nn sequences, so return string unchanged
+            return str;
+        }
 
         if (charset == null) {
             charset = StandardCharsets.UTF_8;
         }
 
-        int len = bytes.length;
+        /*
+         * Decoding is required.
+         *
+         * Potential complications:
+         * - The source String may be partially decoded so it is not valid to
+         *   assume that the source String is ASCII.
+         * - Have to process as characters since there is no guarantee that the
+         *   byte sequence for '%' is going to be the same in all character
+         *   sets.
+         * - We don't know how many '%nn' sequences are required for a single
+         *   character. It varies between character sets and some use a variable
+         *   length.
+         */
+
+        // This isn't perfect but it is a reasonable guess for the size of the
+        // array required
+        ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length() * 2);
+
+        OutputStreamWriter osw = new OutputStreamWriter(baos, charset);
+
+        char[] sourceChars = str.toCharArray();
+        int len = sourceChars.length;
         int ix = 0;
-        int ox = 0;
-        while (ix < len) {
-            byte b = bytes[ix++];     // Get byte to test
-            if (b == '%') {
-                if (ix + 2 > len) {
-                    throw new IllegalArgumentException(
-                            sm.getString("uDecoder.urlDecode.missingDigit"));
+
+        try {
+            while (ix < len) {
+                char c = sourceChars[ix++];
+                if (c == '%') {
+                    osw.flush();
+                    if (ix + 2 > len) {
+                        throw new IllegalArgumentException(
+                                sm.getString("uDecoder.urlDecode.missingDigit", str));
+                    }
+                    char c1 = sourceChars[ix++];
+                    char c2 = sourceChars[ix++];
+                    if (isHexDigit(c1) && isHexDigit(c2)) {
+                        baos.write(x2c(c1, c2));
+                    } else {
+                        throw new IllegalArgumentException(
+                                sm.getString("uDecoder.urlDecode.missingDigit", str));
+                    }
+                } else {
+                    osw.append(c);
                 }
-                b = (byte) ((convertHexDigit(bytes[ix++]) << 4)
-                            + convertHexDigit(bytes[ix++]));
             }
-            bytes[ox++] = b;
-        }
-
-        return new String(bytes, 0, ox, charset);
-    }
+            osw.flush();
 
-
-    private static byte convertHexDigit( byte b ) {
-        if ((b >= '0') && (b <= '9')) return (byte)(b - '0');
-        if ((b >= 'a') && (b <= 'f')) return (byte)(b - 'a' + 10);
-        if ((b >= 'A') && (b <= 'F')) return (byte)(b - 'A' + 10);
-        throw new IllegalArgumentException(
-                sm.getString("uDecoder.convertHexDigit.notHex",
-                        Character.valueOf((char)b)));
+            return baos.toString(charset.name());
+        } catch (IOException ioe) {
+            throw new IllegalArgumentException(
+                    sm.getString("uDecoder.urlDecode.conversionError", str, charset.name()), ioe);
+        }
     }
 
 

Modified: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUDecoder.java
URL: http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/tomcat/util/buf/TestUDecoder.java?rev=1803224&r1=1803223&r2=1803224&view=diff
==============================================================================
--- tomcat/trunk/test/org/apache/tomcat/util/buf/TestUDecoder.java (original)
+++ tomcat/trunk/test/org/apache/tomcat/util/buf/TestUDecoder.java Thu Jul 27 18:45:49 2017
@@ -46,44 +46,59 @@ public class TestUDecoder {
         assertTrue(exception instanceof IllegalArgumentException);
     }
 
+
     @Test
     public void testURLDecodeStringValidIso88591Start() {
-
         String result = UDecoder.URLDecode("%41xxxx", StandardCharsets.ISO_8859_1);
         assertEquals("Axxxx", result);
     }
 
+
     @Test
     public void testURLDecodeStringValidIso88591Middle() {
-
         String result = UDecoder.URLDecode("xx%41xx", StandardCharsets.ISO_8859_1);
         assertEquals("xxAxx", result);
     }
 
+
     @Test
     public void testURLDecodeStringValidIso88591End() {
-
         String result = UDecoder.URLDecode("xxxx%41", StandardCharsets.ISO_8859_1);
         assertEquals("xxxxA", result);
     }
 
+
     @Test
     public void testURLDecodeStringValidUtf8Start() {
         String result = UDecoder.URLDecode("%c3%aaxxxx", StandardCharsets.UTF_8);
         assertEquals("\u00eaxxxx", result);
     }
 
+
     @Test
     public void testURLDecodeStringValidUtf8Middle() {
-
         String result = UDecoder.URLDecode("xx%c3%aaxx", StandardCharsets.UTF_8);
         assertEquals("xx\u00eaxx", result);
     }
 
+
     @Test
     public void testURLDecodeStringValidUtf8End() {
-
         String result = UDecoder.URLDecode("xxxx%c3%aa", StandardCharsets.UTF_8);
         assertEquals("xxxx\u00ea", result);
     }
+
+
+    @Test
+    public void testURLDecodeStringNonAsciiValidNone() {
+        String result = UDecoder.URLDecode("\u00eaxxxx", StandardCharsets.UTF_8);
+        assertEquals("\u00eaxxxx", result);
+    }
+
+
+    @Test
+    public void testURLDecodeStringNonAsciiValidUtf8() {
+        String result = UDecoder.URLDecode("\u00ea%c3%aa", StandardCharsets.UTF_8);
+        assertEquals("\u00ea\u00ea", result);
+    }
 }

Modified: tomcat/trunk/webapps/docs/changelog.xml
URL: http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1803224&r1=1803223&r2=1803224&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/trunk/webapps/docs/changelog.xml Thu Jul 27 18:45:49 2017
@@ -47,9 +47,16 @@
 <section name="Tomcat 9.0.0.M26 (markt)" rtext="in development">
   <subsection name="Catalina">
     <changelog>
-      <fix>Correct a bug in the <code>PushBuilder</code> implementation that
-      meant push URLs containing <code>%nn</code> sequences were not correctly
-      decoded. Identified by FindBugs. (markt)</fix>
+      <fix>
+        Correct a bug in the <code>PushBuilder</code> implementation that
+        meant push URLs containing <code>%nn</code> sequences were not correctly
+        decoded. Identified by FindBugs. (markt)
+      </fix>
+      <fix>
+        <bug>61351</bug>: Correctly handle %nn decoding of URL patterns in
+        web.xml and similar locations that may legitimately contain characters
+        that are not permitted by RFC 3986. (markt)
+      </fix>
     </changelog>
   </subsection>
   <subsection name="Coyote">



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org