You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by Tim Tye <tt...@ticnet.com> on 2001/01/30 17:11:13 UTC

Re: cvs commit: jakarta-tomcat-4.0/catalina/src/share/org/apache/catalina/servlets DefaultServlet.java

Since characters in Java are UNICODE, what does this code do when it encounters a character who's code point is greater than 0xFF?
My suggestion, is to first encode the path as a UTF-8 byte array, then encode the bytes according to this algorithm
Tim

remm@apache.org wrote:

> remm        01/01/29 19:50:09
>
>   Modified:    catalina/src/share/org/apache/catalina/servlets
>                         DefaultServlet.java
>   Log:
>   - Will now encode all unsafe characters on the URL.
>
>   Revision  Changes    Path
>   1.22      +84 -28    jakarta-tomcat-4.0/catalina/src/share/org/apache/catalina/servlets/DefaultServlet.java
>
>   Index: DefaultServlet.java
>   ===================================================================
>   RCS file: /home/cvs/jakarta-tomcat-4.0/catalina/src/share/org/apache/catalina/servlets/DefaultServlet.java,v
>   retrieving revision 1.21
>   retrieving revision 1.22
>   diff -u -r1.21 -r1.22
>   --- DefaultServlet.java       2001/01/25 05:45:40     1.21
>   +++ DefaultServlet.java       2001/01/30 03:50:08     1.22
>   @@ -1,7 +1,7 @@
>    /*
>   - * $Header: /home/cvs/jakarta-tomcat-4.0/catalina/src/share/org/apache/catalina/servlets/DefaultServlet.java,v 1.21 2001/01/25 05:45:40 remm Exp $
>   - * $Revision: 1.21 $
>   - * $Date: 2001/01/25 05:45:40 $
>   + * $Header: /home/cvs/jakarta-tomcat-4.0/catalina/src/share/org/apache/catalina/servlets/DefaultServlet.java,v 1.22 2001/01/30 03:50:08 remm Exp $
>   + * $Revision: 1.22 $
>   + * $Date: 2001/01/30 03:50:08 $
>     *
>     * ====================================================================
>     *
>   @@ -76,6 +76,7 @@
>    import java.io.Reader;
>    import java.io.InputStreamReader;
>    import java.io.Writer;
>   +import java.io.OutputStreamWriter;
>    import java.net.MalformedURLException;
>    import java.net.URL;
>    import java.net.URLEncoder;
>   @@ -87,6 +88,7 @@
>    import java.util.Locale;
>    import java.util.TimeZone;
>    import java.util.Hashtable;
>   +import java.util.BitSet;
>    import java.text.ParseException;
>    import java.text.SimpleDateFormat;
>    import java.security.MessageDigest;
>   @@ -119,7 +121,7 @@
>     *
>     * @author Craig R. McClanahan
>     * @author Remy Maucherat
>   - * @version $Revision: 1.21 $ $Date: 2001/01/25 05:45:40 $
>   + * @version $Revision: 1.22 $ $Date: 2001/01/30 03:50:08 $
>     */
>
>    public class DefaultServlet
>   @@ -219,6 +221,40 @@
>         StringManager.getManager(Constants.Package);
>
>
>   +    /**
>   +     * Array containing the safe characters set.
>   +     */
>   +    protected static BitSet safeCharacters;
>   +
>   +
>   +    protected static final char[] hexadecimal =
>   +    {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
>   +     'A', 'B', 'C', 'D', 'E', 'F'};
>   +
>   +
>   +    // ----------------------------------------------------- Static Initializer
>   +
>   +
>   +    static {
>   +     safeCharacters = new BitSet(256);
>   +     int i;
>   +     for (i = 'a'; i <= 'z'; i++) {
>   +         safeCharacters.set(i);
>   +     }
>   +     for (i = 'A'; i <= 'Z'; i++) {
>   +         safeCharacters.set(i);
>   +     }
>   +     for (i = '0'; i <= '9'; i++) {
>   +         safeCharacters.set(i);
>   +     }
>   +     safeCharacters.set('-');
>   +     safeCharacters.set('_');
>   +     safeCharacters.set('.');
>   +     safeCharacters.set('*');
>   +     safeCharacters.set('/');
>   +    }
>   +
>   +
>        // --------------------------------------------------------- Public Methods
>
>
>   @@ -853,7 +889,7 @@
>                 replaceChar +
>                 normalized.substring(index + 3);
>            }
>   -
>   +
>         // Normalize the slashes and add leading slash if necessary
>         if (normalized.indexOf('\\') >= 0)
>             normalized = normalized.replace('\\', '/');
>   @@ -902,29 +938,49 @@
>         * @param path Path which has to be rewiten
>         */
>        protected String rewriteUrl(String path) {
>   -
>   -        String normalized = path;
>   -
>   -     // Replace " " with "%20"
>   -        while (true) {
>   -         int index = normalized.indexOf(" ");
>   -         if (index < 0)
>   -             break;
>   -         normalized = normalized.substring(0, index) + "%20"
>   -             + normalized.substring(index + 1);
>   -     }
>   -
>   -     // Replace "&" with "%26"
>   -        while (true) {
>   -         int index = normalized.indexOf("&");
>   -         if (index < 0)
>   -             break;
>   -         normalized = normalized.substring(0, index) + "%26"
>   -             + normalized.substring(index + 1);
>   -     }
>   -
>   -        return normalized;
>   -
>   +
>   +        /**
>   +         * Note: This code portion is very similar to URLEncoder.encode.
>   +         * Unfortunately, there is no way to specify to the URLEncoder which
>   +         * characters should be encoded. Here, ' ' should be encoded as "%20"
>   +         * and '/' shouldn't be encoded.
>   +         */
>   +
>   +     int maxBytesPerChar = 10;
>   +        int caseDiff = ('a' - 'A');
>   +        StringBuffer rewrittenPath = new StringBuffer(path.length());
>   +     ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar);
>   +        OutputStreamWriter writer = new OutputStreamWriter(buf);
>   +
>   +        for (int i = 0; i < path.length(); i++) {
>   +            int c = (int) path.charAt(i);
>   +            if (safeCharacters.get(c)) {
>   +                rewrittenPath.append((char)c);
>   +            } else {
>   +                // convert to external encoding before hex conversion
>   +                try {
>   +                    writer.write(c);
>   +                    writer.flush();
>   +                } catch(IOException e) {
>   +                    buf.reset();
>   +                    continue;
>   +                }
>   +                byte[] ba = buf.toByteArray();
>   +                for (int j = 0; j < ba.length; j++) {
>   +                    // Converting each byte in the buffer
>   +                    byte toEncode = ba[j];
>   +                    rewrittenPath.append('%');
>   +                    int low = (int) (toEncode & 0x0f);
>   +                    int high = (int) ((toEncode & 0xf0) >> 4);
>   +                    rewrittenPath.append(hexadecimal[high]);
>   +                    rewrittenPath.append(hexadecimal[low]);
>   +                }
>   +                buf.reset();
>   +            }
>   +        }
>   +
>   +        return rewrittenPath.toString();
>   +
>        }
>
>
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: tomcat-dev-unsubscribe@jakarta.apache.org
> For additional commands, email: tomcat-dev-help@jakarta.apache.org


Re: cvs commit: jakarta-tomcat-4.0/catalina/src/share/org/apache/catalina/servlets DefaultServlet.java

Posted by Remy Maucherat <re...@apache.org>.
> Since characters in Java are UNICODE, what does this code do when it
encounters a character who's code point is greater than 0xFF?
> My suggestion, is to first encode the path as a UTF-8 byte array, then
encode the bytes according to this algorithm

Yes, the writer should probably use the "UTF-8" encoding.

Remy