You are viewing a plain text version of this content. The canonical link for it is here.
Posted to slide-dev@jakarta.apache.org by je...@apache.org on 2002/03/29 02:45:49 UTC

cvs commit: jakarta-slide/src/util/org/apache/util GenericURI.java URIUtil.java

jericho     02/03/28 17:45:49

  Modified:    src/util/org/apache/util GenericURI.java URIUtil.java
  Log:
  - URIUtil and GenericURI revert to 1.6 and 1.1 revision back.
  - Please, ignore the right previous revision.
  
  I'm not gonna try to hurt any API compatiblity with SLIDE_1_0 as much as I can do.
  OOooops... I didn't notice that they was changed. I'm very sorry... :(
  
  Revision  Changes    Path
  1.9       +13 -14    jakarta-slide/src/util/org/apache/util/GenericURI.java
  
  Index: GenericURI.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/util/org/apache/util/GenericURI.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- GenericURI.java	28 Mar 2002 06:12:06 -0000	1.8
  +++ GenericURI.java	29 Mar 2002 01:45:49 -0000	1.9
  @@ -1,7 +1,7 @@
   /*
  - * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/GenericURI.java,v 1.8 2002/03/28 06:12:06 jericho Exp $
  - * $Revision: 1.8 $
  - * $Date: 2002/03/28 06:12:06 $
  + * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/GenericURI.java,v 1.9 2002/03/29 01:45:49 jericho Exp $
  + * $Revision: 1.9 $
  + * $Date: 2002/03/29 01:45:49 $
    *
    * ====================================================================
    *
  @@ -29,7 +29,7 @@
    *    Alternately, this acknowlegement may appear in the software itself,
    *    if and wherever such third-party acknowlegements normally appear.
    *
  - * 4. the names "The Jakarta Project", "Slide", and "Apache Software
  + * 4. The names "The Jakarta Project", "Slide", and "Apache Software
    *    Foundation" must not be used to endorse or promote products derived
    *    from this software without prior written permission. For written 
    *    permission, please contact apache@apache.org.
  @@ -80,7 +80,7 @@
    *
    * @author <a href="mailto:jericho@thinkfree.com">Park, Sung-Gu</a>
    */
  -public abstract class URI implements URI, java.io.Serializable {
  +public abstract class GenericURI implements java.io.Serializable {
   
   
       // --------------------------------------------------------- Constructors
  @@ -91,7 +91,7 @@
        *
        * @param escapedURI The escaped URI string.
        */
  -    public URI(String escapedURI) {
  +    public GenericURI(String escapedURI) {
           URI = escapedURI;
       }
   
  @@ -103,7 +103,7 @@
        * @param host The host string.
        * @param port The port number.
        */
  -    public URI(String scheme, String host, int port) {
  +    public GenericURI(String scheme, String host, int port) {
           this(scheme, host, port, null, null);
       }
   
  @@ -115,7 +115,7 @@
        * @param host The host string.
        * @param path The path string.
        */
  -    public URI(String scheme, String host, String path) {
  +    public GenericURI(String scheme, String host, String path) {
           this(scheme, host, -1, path, null);
       }
   
  @@ -128,7 +128,7 @@
        * @param port The port number.
        * @param path The path string.
        */
  -    public URI(String scheme, String host, int port, String path) {
  +    public GenericURI(String scheme, String host, int port, String path) {
           this(scheme, host, port, path, null);
       }
   
  @@ -141,7 +141,7 @@
        * @param path The path string.
        * @param query The query string.
        */
  -    public URI(String scheme, String host, String path, String query) {
  +    public GenericURI(String scheme, String host, String path, String query) {
           this(scheme, host, -1, path, null);
       }
   
  @@ -155,7 +155,7 @@
        * @param path The path string.
        * @param query The query string.
        */
  -    public URI(String scheme, String host, int port, String path,
  +    public GenericURI(String scheme, String host, int port, String path,
                         String query) {
           
           URI = URIUtil.escape(scheme, URIUtil.schemeReserved()) + "://" +
  @@ -177,7 +177,6 @@
        * The URI to be escaped must be saved and processed.
        */
       private String URI;
  -    protected char[] uri = null;
   
   
       /**
  @@ -958,10 +957,10 @@
        */
       public boolean equals(Object obj) {
   
  -        if ((obj != null) && (obj instanceof URI)) {
  +        if ((obj != null) && (obj instanceof GenericURI)) {
   
               try {
  -                URI URI = (URI) obj;
  +                GenericURI URI = (GenericURI) obj;
                   if (getEscapedScheme().equalsIgnoreCase
                       (URI.getEscapedScheme()) &&
                       getEscapedHost().equalsIgnoreCase
  
  
  
  1.3       +267 -579  jakarta-slide/src/util/org/apache/util/URIUtil.java
  
  Index: URIUtil.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/util/org/apache/util/URIUtil.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- URIUtil.java	28 Mar 2002 06:12:06 -0000	1.2
  +++ URIUtil.java	29 Mar 2002 01:45:49 -0000	1.3
  @@ -1,12 +1,13 @@
   /*
  - * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URIUtil.java,v 1.2 2002/03/28 06:12:06 jericho Exp $
  - * $Revision: 1.2 $
  - * $Date: 2002/03/28 06:12:06 $
  + * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URIUtil.java,v 1.3 2002/03/29 01:45:49 jericho Exp $
  + * $Revision: 1.3 $
  + * $Date: 2002/03/29 01:45:49 $
  + *
    * ====================================================================
    *
    * The Apache Software License, Version 1.1
    *
  - * Copyright (c) 1999 The Apache Software Foundation.  All rights
  + * Copyright (c) 1999 The Apache Software Foundation.  All rights 
    * reserved.
    *
    * Redistribution and use in source and binary forms, with or without
  @@ -14,7 +15,7 @@
    * are met:
    *
    * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  + *    notice, this list of conditions and the following disclaimer. 
    *
    * 2. Redistributions in binary form must reproduce the above copyright
    *    notice, this list of conditions and the following disclaimer in
  @@ -22,15 +23,15 @@
    *    distribution.
    *
    * 3. The end-user documentation included with the redistribution, if
  - *    any, must include the following acknowlegement:
  - *       "This product includes software developed by the
  + *    any, must include the following acknowlegement:  
  + *       "This product includes software developed by the 
    *        Apache Software Foundation (http://www.apache.org/)."
    *    Alternately, this acknowlegement may appear in the software itself,
    *    if and wherever such third-party acknowlegements normally appear.
    *
  - * 4. The names "The Jakarta Project", "HttpClient", and "Apache Software
  + * 4. The names "The Jakarta Project", "Slide", and "Apache Software
    *    Foundation" must not be used to endorse or promote products derived
  - *    from this software without prior written permission. For written
  + *    from this software without prior written permission. For written 
    *    permission, please contact apache@apache.org.
    *
    * 5. Products derived from this software may not be called "Apache"
  @@ -58,688 +59,375 @@
    *
    * [Additional notices, if required by prior licensing conditions]
    *
  - */
  + */ 
   
  -package org.apache.commons.httpclient;
  +package org.apache.util;
   
   import java.io.UnsupportedEncodingException;
  +import java.io.ByteArrayOutputStream;
  +import java.io.OutputStreamWriter;
  +import java.io.IOException;
   import java.util.BitSet;
   
  +
   /**
  - * <p>
  - * General purpose methods for encoding URI's, as described in
  - * <a href="http://www.ietf.org/rfc/rfc2396.txt?number=2396">RFC 2396</a>.
  - * </p>
  - * <p>
  - * This class provides a number of methods useful for encoding and
  - * decoding the "%HH" format, as used in various HTTP related
  - * formats such as URIs and the <nobr>x-www-form-urlencoded</nobr>
  - * MIME type.
  - * It can be seen as a more flexible (and more robust) form
  - * of the core JDK {@link java.net.URLEncoder} and
  - * {@link java.net.URLDecoder} classes.
  - * </p>
  + * General purpose escaping and unescaping utility methods.
  + * For "character encoding", The whole escaped characters must be done.
  + * It's different between "character encoding" and "escaping of characters".
  + *
  + * NOTICE: In order to do URI escaping, using the reserved characters defined
  + * in this class is not recommended for the the specific protocol.
    *
    * @author Craig R. McClanahan
    * @author Tim Tye
    * @author Remy Maucherat
    * @author Park, Sung-Gu
  - * @author Rodney Waldhoff
  - * @version $Revision: 1.2 $ $Date: 2002/03/28 06:12:06 $
  + * @version $Revision: 1.3 $ $Date: 2002/03/29 01:45:49 $
  + * @see <a href=http://www.ietf.org/rfc/rfc2396.txt?number=2396>RFC 2396</a>
    */
   
   public class URIUtil {
   
  -    // --------------------------------------------------------- Public Methods
  +    // -------------------------------------------------------------- Constants
   
  +    
       /**
  -     * Unescape the given {@link String}, converting all <tt>%HH</tt>
  -     * sequences into the UTF-8 character <tt>0xHH</tt>.
  -     *
  -     * @param str the escaped {@link String}
  -     * @exception IllegalArgumentException if a '%'
  -     *            character is not followed by a
  -     *            valid 2-digit hexadecimal number
  -     */
  -    public static final String decode(String str) {
  -            return decode(str,false);
  -    }
  +     * Array containing the ASCII expression for hexadecimal.
  +     */
  +    private static final char[] hexadecimal =
  +    {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 
  +     'A', 'B', 'C', 'D', 'E', 'F'};
  +
  +
  +    // ----------------------------------------------------- Instance Variables
  +
   
       /**
  -     * Unescape the given {@link String}, converting all <tt>%HH</tt>
  -     * sequences into the UTF-8 character <tt>0xHH</tt>.
  -     * <p>
  -     * When <i>plusIsSpace</i> is true, <tt>'+'</tt> will
  -     * be converted into <tt>' '</tt> (space),
  -     * as is used in the <nobr>x-www-form-urlencoded</nobr>
  -     * MIME type.
  -     *
  -     * @param str the escaped {@link String}
  -     * @exception IllegalArgumentException if a '%'
  -     *            character is not followed by a
  -     *            valid 2-digit hexadecimal number
  +     * Array containing the alphanum URI character set.
        */
  -    public static final String decode(String str, boolean plusIsSpace) {
  -        try {
  -            return (str == null) ? null : decode(str.getBytes(),null,plusIsSpace);
  -        } catch(UnsupportedEncodingException e) {
  -            throw new RuntimeException("Default encoding not supported !?!");
  -        }
  -    }
  +    private static BitSet alphanum;
  +
   
       /**
  -     * Unescape the given byte array by first converting all
  -     * <tt>'%'</tt>, <tt>'H'</tt>, <tt>'H'</tt> sequences into
  -     * the byte <tt>0xHH</tt>, and then converting the bytes
  -     * into characters using the default encoding.
  -     *
  -     * @param bytes the escaped byte array, which
  -     *              <i>may be changed</i> by this
  -     *              call
  -     * @exception IllegalArgumentException if a <tt>'%'</tt>
  -     *            byte is not followed by a valid
  -     *            2-digit hexadecimal number (as bytes)
  +     * Array containing the reserved URI character set of the scheme part.
        */
  -    public static final String decode(byte[] bytes) {
  -        try {
  -            return decode(bytes,null,false);
  -        } catch(UnsupportedEncodingException e) {
  -            throw new RuntimeException("Default encoding not supported !?!");
  -        }
  -    }
  +    private static BitSet schemeReserved;
  +
   
       /**
  -     * Unescape the given byte array by first converting all
  -     * <tt>'%'</tt>, <tt>'H'</tt>, <tt>'H'</tt> sequences into
  -     * the byte <tt>0xHH</tt>, and then converting the bytes
  -     * into characters using the default encoding.
  -     * <p>
  -     * When <i>plusIsSpace</i> is true, <tt>'+'</tt> will
  -     * be converted into <tt>' '</tt> (space),
  -     * as is used in the <nobr>x-www-form-urlencoded</nobr>
  -     * MIME type.
  -     *
  -     * @param bytes the escaped byte array, which
  -     *              <i>may be changed</i> by this
  -     *              call
  -     * @exception IllegalArgumentException if a <tt>'%'</tt>
  -     *            byte is not followed by a valid
  -     *            2-digit hexadecimal number (as bytes)
  +     * Array containing the reserved URI character set of the authority part.
        */
  -    public static final String decode(byte[] bytes, boolean plusIsSpace) {
  -        try {
  -            return decode(bytes,null,plusIsSpace);
  -        } catch(UnsupportedEncodingException e) {
  -            throw new RuntimeException("Default encoding not supported !?!");
  -        }
  -    }
  +    private static BitSet authorityReserved;
  +
   
       /**
  -     * Unescape a fragment of the given byte array, by
  -     * first converting all <tt>'%'</tt>, <tt>'H'</tt>,
  -     * <tt>'H'</tt> sequences into the byte
  -     * <tt>0xHH</tt>, and then converting the bytes
  -     * into characters using the default encoding.
  -     *
  -     * @param bytes the escaped byte array, which
  -     *              <i>may be changed</i> by this
  -     *              call
  -     * @param off the index of the first byte to convert
  -     * @param len the number of unescaped bytes to convert
  -     * @exception IllegalArgumentException if a <tt>'%'</tt>
  -     *            byte is not followed by a valid
  -     *            2-digit hexadecimal number (as bytes)
  +     * Array containing the reserved URI character set of the userinfo part.
        */
  -    public static final String decode(byte[] bytes, int off, int len) {
  -        try {
  -            return decode(bytes, off, len, null, false);
  -        } catch(UnsupportedEncodingException e) {
  -            throw new RuntimeException("Default encoding not supported !?!");
  -        }
  -    }
  +    private static BitSet userinfoReserved;
  +
   
       /**
  -     * Unescape a fragment of the given byte array, by
  -     * first converting all <tt>'%'</tt>, <tt>'H'</tt>,
  -     * <tt>'H'</tt> sequences into the byte
  -     * <tt>0xHH</tt>, and then converting the bytes
  -     * into characters using the default encoding.
  -     * <p>
  -     * When <i>plusIsSpace</i> is true, <tt>'+'</tt> will
  -     * be converted into <tt>' '</tt> (space),
  -     * as is used in the <nobr>x-www-form-urlencoded</nobr>
  -     * MIME type.
  -     *
  -     * @param bytes the escaped byte array, which
  -     *              <i>may be changed</i> by this
  -     *              call
  -     * @param off the index of the first byte to convert
  -     * @param len the number of unescaped bytes to convert
  -     * @exception IllegalArgumentException if a <tt>'%'</tt>
  -     *            byte is not followed by a valid
  -     *            2-digit hexadecimal number (as bytes)
  +     * Array containing the reserved URI character set of the host part.
        */
  -    public static final String decode(byte[] bytes, int off, int len, boolean plusIsSpace) {
  -        try {
  -            return decode(bytes, off, len, null, plusIsSpace);
  -        } catch(UnsupportedEncodingException e) {
  -            throw new RuntimeException("Default encoding not supported !?!");
  -        }
  -    }
  +    private static BitSet hostReserved;
   
  -    /**
  -     * Unescape the given byte array by first converting all
  -     * <tt>'%'</tt>, <tt>'H'</tt>, <tt>'H'</tt> sequences into
  -     * the byte <tt>0xHH</tt>, and then converting the bytes
  -     * into characters using the default encoding.
  -     *
  -     * @param bytes the escaped byte array, which
  -     *              <i>may be changed</i> by this
  -     *              call
  -     * @param enc the encoding to use
  -     * @exception IllegalArgumentException if a <tt>'%'</tt>
  -     *            byte is not followed by a valid
  -     *            2-digit hexadecimal number (as bytes)
  -     */
  -    public static final String decode(byte[] bytes, String enc) throws UnsupportedEncodingException {
  -        return decode(bytes, 0, bytes.length, enc, false);
  -    }
   
       /**
  -     * Unescape the given byte array by first converting all
  -     * <tt>'%'</tt>, <tt>'H'</tt>, <tt>'H'</tt> sequences into
  -     * the byte <tt>0xHH</tt>, and then converting the bytes
  -     * into characters using the default encoding.
  -     * <p>
  -     * When <i>plusIsSpace</i> is true, <tt>'+'</tt> will
  -     * be converted into <tt>' '</tt> (space),
  -     * as is used in the <nobr>x-www-form-urlencoded</nobr>
  -     * MIME type.
  -     *
  -     * @param bytes the escaped byte array, which
  -     *              <i>may be changed</i> by this
  -     *              call
  -     * @param enc the encoding to use
  -     * @exception IllegalArgumentException if a <tt>'%'</tt>
  -     *            byte is not followed by a valid
  -     *            2-digit hexadecimal number (as bytes)
  -     */
  -    public static final String decode(byte[] bytes, String enc, boolean plusIsSpace) throws UnsupportedEncodingException {
  -        return decode(bytes, 0, bytes.length, enc, plusIsSpace);
  -    }
  +     * Array containing the reserved URI character set of the path part.
  +     */
  +    private static BitSet pathReserved;
   
  -    /**
  -     * Unescape a fragment of the given byte array by first
  -     * converting all <tt>'%'</tt>, <tt>'H'</tt>, <tt>'H'</tt>
  -     * sequences into the byte <tt>0xHH</tt>, and then
  -     * converting the bytes into characters using the
  -     * specified encoding.
  -     *
  -     * @param bytes the escaped byte array
  -     * @param off the index of the first byte to convert
  -     * @param len the number of escaped bytes to convert
  -     * @param enc the encoding to use
  -     * @exception IllegalArgumentException if a '%'
  -     *            character is not followed by a
  -     *            valid 2-digit hexadecimal number
  -     */
  -    public static final String decode(byte[] bytes, int off, int len, String enc) throws UnsupportedEncodingException {
  -        return decode(bytes, 0, bytes.length, enc, false);
  -    }
   
       /**
  -     * Unescape a fragment of the given byte array by first
  -     * converting all <tt>'%'</tt>, <tt>'H'</tt>, <tt>'H'</tt>
  -     * sequences into the byte <tt>0xHH</tt>, and then
  -     * converting the bytes into characters using the
  -     * specified encoding.
  -     * <p>
  -     * When <i>plusIsSpace</i> is true, a
  -     * <tt>'+'</tt> byte will be converted into <tt>' '</tt>,
  -     * as is used in the <nobr>x-www-form-urlencoded</nobr>
  -     * MIME type.
  -     *
  -     * @param bytes the escaped byte array
  -     * @param off the index of the first byte to convert
  -     * @param len the number of escaped bytes to convert
  -     * @param enc the encoding to use
  -     * @exception IllegalArgumentException if a '%'
  -     *            character is not followed by a
  -     *            valid 2-digit hexadecimal number
  -     */
  -    public static final String decode(byte[] bytes, int off, int len, String enc, boolean plusIsSpace) throws UnsupportedEncodingException {
  -        if(null == bytes) { return null; }
  -        int end = off + len;
  -        int ix = off;
  -        int ox = off;
  -        while (ix < end) {
  -            byte b = bytes[ix++];     // Get byte to test
  -            if(plusIsSpace && b == '+') {
  -                b = (byte) ' ';
  -            } else if(b == '%') {
  -                b = (byte) ((convertHexDigit(bytes[ix++]) << 4)
  -                            + convertHexDigit(bytes[ix++]));
  -            }
  -            bytes[ox++] = b;
  +     * Array containing the reserved URI character set of the query.
  +     */
  +    private static BitSet queryReserved;
  +
  +
  +    // ----------------------------------------------------- Static Initializer
  +
  +
  +    static {
  +
  +        // Save the alphanum URI characters that is common to do URI escaping.
  +        alphanum = new BitSet(128);
  +        for (int i = 'a'; i <= 'z'; i++) {
  +            alphanum.set(i);
           }
  -        if (enc != null) {
  -            return new String(bytes, off, ox, enc);
  -        } else {
  -            return new String(bytes, off, ox);
  +        for (int i = 'A'; i <= 'Z'; i++) {
  +            alphanum.set(i);
           }
  +        for (int i = '0'; i <= '9'; i++) {
  +            alphanum.set(i);
  +        }
  +
  +        // Save the reserved URI characters within the sheme component.
  +        schemeReserved = new BitSet(128);
  +        /**
  +         * Actually, this should be any combination of lower case letters,
  +         * digits, plus ("+"), period ("."), or hyphen ("-").
  +         * The upper case letters should be treated as equivalent to lower
  +         * case in scheme names.
  +         */
  +        schemeReserved.set('+');
  +        schemeReserved.set('.');
  +        schemeReserved.set('-');
  +
  +        // Save the reserved URI characters within the authority component.
  +        authorityReserved = new BitSet(128);
  +        authorityReserved.set(';');
  +        authorityReserved.set(':');
  +        authorityReserved.set('@');
  +        authorityReserved.set('?');
  +        authorityReserved.set('/');
  +
  +        // Save the reserved URI characters within the userinfo component.
  +        userinfoReserved = new BitSet(128);
  +        userinfoReserved.set(';');
  +        userinfoReserved.set(':');
  +        userinfoReserved.set('&');
  +        userinfoReserved.set('=');
  +        userinfoReserved.set('+');
  +        userinfoReserved.set('$');
  +        userinfoReserved.set(',');
  +
  +        // Save the reserved URI characters within the host component.
  +        hostReserved = new BitSet(128);
  +        hostReserved.set('.');
  +        hostReserved.set('-');
  +
  +        // Save the reserved URI characters within the path component.
  +        pathReserved = new BitSet(128);
  +        pathReserved.set('/');
  +        pathReserved.set(';');
  +        pathReserved.set('=');
  +        pathReserved.set('?');
  +
  +        // Save the reserved URI characters within the query component.
  +        queryReserved = new BitSet(128);
  +        queryReserved.set(';');
  +        queryReserved.set('/');
  +        queryReserved.set('?');
  +        queryReserved.set(':');
  +        queryReserved.set('@');
  +        queryReserved.set('&');
  +        queryReserved.set('=');
  +        queryReserved.set('+');
  +        queryReserved.set(',');
  +        queryReserved.set('$');
  +
       }
   
  -    // ------------------------------------------------------------------------
  +
  +    // ------------------------------------------------------------ Properties
  +
   
       /**
  -     * Escape the given {@link String} by replacing
  -     * all characters not in the default
  -     * set of safe characters ({@link #unreserved()})
  -     * into the sequence %HH (where HH is the hex
  -     * value of the character in the default encoding).
  -     *
  -     * @param str The unescaped string
  +     * Get the reserved URI character set of alphanum.
        */
  -    public static final String encode(String str) {
  -        return encode(str, null, false);
  +    public static BitSet alphanum() {
  +        return alphanum;
       }
  +    
   
       /**
  -     * Escape the given {@link String} by replacing
  -     * all characters not in the default
  -     * set of safe characters ({@link #unreserved()})
  -     * into the sequence %HH (where HH is the hex
  -     * value of the character in the default encoding).
  -     * <p>
  -     * When <i>spaceAsPlus</i> is true and <tt>' '</tt>
  -     * needs to be encoded (i.e., it is not a safe byte),
  -     * then it will be converted to <tt>'+'</tt>,
  -     * as is used in the <nobr>x-www-form-urlencoded</nobr>
  -     * MIME type.
  -     *
  -     * @param str The unescaped string
  +     * Get the reserved URI character set of the scheme component.
        */
  -    public static final String encode(String str, boolean spaceAsPlus) {
  -        return encode(str, null, spaceAsPlus);
  +    public static BitSet schemeReserved() {
  +        return schemeReserved;
       }
   
  +
       /**
  -     * Escape the given {@link String} by replacing
  -     * all characters not in the given
  -     * set of safe characters into the sequence
  -     * %HH (where HH is the hex value of the
  -     * character in the default encoding).
  -     *
  -     * @param str The unescaped string
  -     * @param safe The set of "safe" characters (not to be escaped)
  +     * Get the reserved URI character set of the authority component.
        */
  -    public static final String encode(String str, BitSet safe) {
  -        return (str == null) ? null : encode(str.getBytes(), safe, false);
  +    public static BitSet authorityReserved() {
  +        return authorityReserved;
       }
   
  +
       /**
  -     * Escape the given {@link String} by replacing
  -     * all characters not in the given
  -     * set of safe characters into the sequence
  -     * %HH (where HH is the hex value of the
  -     * character in the default encoding).
  -     * <p>
  -     * When <i>spaceAsPlus</i> is true and <tt>' '</tt>
  -     * needs to be encoded (i.e., it is not a safe byte),
  -     * then it will be converted to <tt>'+'</tt>,
  -     * as is used in the <nobr>x-www-form-urlencoded</nobr>
  -     * MIME type.
  -     *
  -     * @param str The unescaped string
  -     * @param safe The set of "safe" characters (not to be escaped)
  +     * Get the reserved URI character set of the userinfo component.
        */
  -    public static final String encode(String str, BitSet safe, boolean spaceAsPlus) {
  -        return (str == null) ? null : encode(str.getBytes(), safe, spaceAsPlus);
  +    public static BitSet userinfoReserved() {
  +        return userinfoReserved;
       }
   
  +
       /**
  -     * Escape the given byte array by first converting all
  -     * bytes not in the given set of "safe" bytes to
  -     * the sequence <tt>'%'</tt>, <tt>'H'</tt>, <tt>'H'</tt>
  -     * (where HH is the hex value of the byte),
  -     * and then converting the bytes
  -     * into characters using the default encoding.
  -     *
  -     * @param bytes the unescaped bytes
  -     * @param safe the set of "safe" bytes (not to be escaped)
  +     * Get the reserved URI character set of the host component.
        */
  -    public static final String encode(byte[] bytes, BitSet safe) {
  -        return (bytes == null) ? null : encode(bytes, 0, bytes.length, safe, false);
  +    public static BitSet hostReserved() {
  +        return hostReserved;
       }
   
  +
       /**
  -     * Escape the given byte array by first converting all
  -     * bytes not in the given set of "safe" bytes to
  -     * the sequence <tt>'%'</tt>, <tt>'H'</tt>, <tt>'H'</tt>
  -     * (where HH is the hex value of the byte),
  -     * and then converting the bytes
  -     * into characters using the default encoding.
  -     * <p>
  -     * When <i>spaceAsPlus</i> is true and <tt>' '</tt>
  -     * needs to be encoded (i.e., it is not a safe byte),
  -     * then it will be converted to <tt>'+'</tt>,
  -     * as is used in the <nobr>x-www-form-urlencoded</nobr>
  -     * MIME type.
  -     *
  -     * @param bytes the unescaped bytes
  -     * @param safe the set of "safe" bytes (not to be escaped)
  +     * Get the reserved URI character set of the path component.
        */
  -    public static final String encode(byte[] bytes, BitSet safe, boolean spaceAsPlus) {
  -        return (bytes == null) ? null : encode(bytes, 0, bytes.length, safe, spaceAsPlus);
  +    public static BitSet pathReserved() {
  +        return pathReserved;
       }
   
  -    /**
  -     * Escape a fragment of the given byte array by first
  -     * converting all bytes not in the given set of
  -     * "safe" bytes to the sequence <tt>'%'</tt>,
  -     * <tt>'H'</tt>, <tt>'H'</tt>
  -     * (where HH is the hex value of the byte),
  -     * and then converting the bytes
  -     * into characters using the default encoding.
  -     *
  -     * @param bytes The unescaped bytes
  -     * @param off the index of the first byte to convert
  -     * @param len the number of unescaped bytes to convert
  -     * @param safe The set of "safe" bytes (not to be escaped)
  -     */
  -    public static final String encode(byte[] bytes, int off, int len, BitSet safe) {
  -        return (bytes == null) ? null : encode(bytes, 0, bytes.length, safe, false);
  -    }
   
       /**
  -     * Escape a fragment of the given byte array by first
  -     * converting all bytes not in the given set of
  -     * "safe" bytes to the sequence <tt>'%'</tt>,
  -     * <tt>'H'</tt>, <tt>'H'</tt>
  -     * (where HH is the hex value of the byte),
  -     * and then converting the bytes
  -     * into characters using the default encoding.
  -     * <p>
  -     * When <i>spaceAsPlus</i> is true and <tt>' '</tt>
  -     * needs to be encoded (i.e., it is not a safe byte),
  -     * then it will be converted to <tt>'+'</tt>,
  -     * as is used in the <nobr>x-www-form-urlencoded</nobr>
  -     * MIME type.
  -     *
  -     * @param bytes The unescaped bytes
  -     * @param off the index of the first byte to convert
  -     * @param len the number of unescaped bytes to convert
  -     * @param safe The set of "safe" bytes (not to be escaped)
  -     */
  -    public static final String encode(byte[] bytes, int off, int len, BitSet safe, boolean spaceAsPlus) {
  -        if(null == bytes) { return null; }
  -        if(null == safe) { safe = unreserved; }
  -        StringBuffer rewrittenStr = new StringBuffer(len);
  -        for (int i = off; i < len; i++) {
  -            char c = (char) bytes[i];
  -            if(safe.get(c)) {
  -                rewrittenStr.append(c);
  -            } else {
  -                if(spaceAsPlus && ' ' == c) {
  -                    rewrittenStr.append('+');
  -                } else {
  -                    byte toEscape = bytes[i];
  -                    rewrittenStr.append('%');
  -                    int low = (int) (toEscape & 0x0f);
  -                    int high = (int) ((toEscape & 0xf0) >> 4);
  -                    rewrittenStr.append(hexadecimal[high]);
  -                    rewrittenStr.append(hexadecimal[low]);
  -                }
  -            }
  -        }
  -        return rewrittenStr.toString();
  +     * Get the reserved URI character set of the query component.
  +     */
  +    public static BitSet queryReserved() {
  +        return queryReserved;
       }
   
  -    /**
  -     * Escape the given {@link String}, first converting
  -     * the {@link String} to bytes using the specified
  -     * encoding, then replacing all bytes not in the given
  -     * set of safe bytes into the sequence %HH (where HH is
  -     * the hex value of the byte).
  -     *
  -     * @param str the unescaped string
  -     * @param safe the set of "safe" characters (not to be escaped)
  -     * @param enc the encoding to use
  -     */
  -    public static final String encode(String str, BitSet safe, String enc) {
  -        return encode(str,safe,enc,false);
  -    }
  +
  +    // -------------------------------------------------------- Private Methods
  +
   
       /**
  -     * Escape the given {@link String}, first converting
  -     * the {@link String} to bytes using the specified
  -     * encoding, then replacing all bytes not in the given
  -     * set of safe bytes into the sequence %HH (where HH is
  -     * the hex value of the byte).
  -     * <p>
  -     * When <i>spaceAsPlus</i> is true and <tt>' '</tt>
  -     * needs to be encoded (i.e., it is not a safe byte),
  -     * then it will be converted to <tt>'+'</tt>,
  -     * as is used in the <nobr>x-www-form-urlencoded</nobr>
  -     * MIME type.
  +     * Convert a byte character value to hexidecimal digit value.
        *
  -     * @param str the unescaped string
  -     * @param safe the set of "safe" characters (not to be escaped)
  -     * @param enc the encoding to use
  +     * @param b the character value byte
        */
  -    public static final String encode(String str, BitSet safe, String enc, boolean spaceAsPlus) {
  -        try {
  -            return encode(str.getBytes(enc), safe, spaceAsPlus);
  -        } catch (UnsupportedEncodingException e) {
  -            e.printStackTrace();
  -            return encode(str.getBytes(), safe, spaceAsPlus);
  -        }
  +    private static byte convertHexDigit(byte b) {
  +        if ((b >= '0') && (b <= '9')) return (byte)(b - '0');
  +        if ((b >= 'a') && (b <= 'f')) return (byte)(b - 'a' + 10);
  +        if ((b >= 'A') && (b <= 'F')) return (byte)(b - 'A' + 10);
  +        return 0;
       }
   
  -
  +    
  +    // --------------------------------------------------------- Public Methods
  +    
  +    
       /**
  +     * Unescape the escaped URI string.
        *
  +     * @param str The escaped URI string.
  +     * @exception IllegalArgumentException if a '%' character is not followed
  +     * by a valid 2-digit hexadecimal number
        */
  -    public static final String getPath(String url) throws java.net.MalformedURLException
  -    {
  -        String path = new java.net.URL(url).getPath();
  -        if(path.length() == 0){
  -            path = "/";
  -        }
  -        return path;
  +    public static String unescape(String str) {
  +        return (str == null) ? null : unescape(str.getBytes());
       }
  -
  -    // ------------------------------- RFC 2396 Character Sets : Public Methods
  -
  -    public static final BitSet unreserved() {
  -        return unreserved;
  -    }
  -
  -    public static final BitSet pathSafe() {
  -        return pathSafe;
  -    }
  -
  -    public static final BitSet queryStringValueSafe() {
  -        return queryStringValueSafe;
  -    }
  -
  -    // ------------------------------------------------------ Private Constants
  +    
   
       /**
  -     * Array mapping hexadecimal values to the corresponding ASCII characters.
  +     * Unescape the escaped URI string.
  +     *
  +     * @param bytes The escaped URI byte array.
  +     * @exception IllegalArgumentException if a '%' character is not followed
  +     * by a valid 2-digit hexadecimal number
        */
  -    private static final char[] hexadecimal = {
  -        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  -        'A', 'B', 'C', 'D', 'E', 'F'
  -    };
  +    public static String unescape(byte[] bytes) {
  +        return unescape(bytes, null);
  +    }
   
  -    // ---------------------------- RFC 2396 Character Sets : Private Constants
   
       /**
  -     * "Alpha" characters from RFC 2396.
  -     * <pre>alpha = ["a"-"z"] | ["A"-"Z"]</pre>
  +     * Unescape the escaped URI string with character encoding.
  +     *
  +     * @param bytes The escaped URI byte array.
  +     * @param enc The encoding to use.
  +     *            If null or wrong, the default encoding is used.
  +     * @exception IllegalArgumentException if a '%' character is not followed
  +     * by a valid 2-digit hexadecimal number
        */
  -    private static final BitSet alpha = new BitSet(256);
  -    /** Static initializer for {@link #alpha}. */
  -    static {
  -        for(int i='a';i<='z';i++) {
  -            alpha.set(i);
  +    public static String unescape(byte[] bytes, String enc) {
  +        
  +        if (bytes == null)
  +            return (null);
  +        
  +        int len = bytes.length;
  +        int ix = 0;
  +        int ox = 0;
  +        while (ix < len) {
  +            byte b = bytes[ix++];     // Get byte to test
  +            if (b == '+') {
  +                b = (byte) ' ';
  +            } else if (b == '%') {
  +                b = (byte) ((convertHexDigit(bytes[ix++]) << 4)
  +                            + convertHexDigit(bytes[ix++]));
  +            }
  +            bytes[ox++] = b;
           }
  -        for(int i='A';i<='Z';i++) {
  -            alpha.set(i);
  +        if (enc != null) {
  +            try {
  +                return new String(bytes, 0, ox, enc);
  +            } catch (UnsupportedEncodingException e) {
  +                e.printStackTrace();
  +            }
           }
  -    }
   
  -    /**
  -     * "Alphanum" characters from RFC 2396.
  -     * <pre>alphanum = {@link #alpha} | ["0"-"9"]</pre>
  -     */
  -    private static final BitSet alphanum = new BitSet(256);
  -    /** Static initializer for {@link #alphanum}. */
  -    static {
  -        alphanum.or(alpha);
  -        for(int i='0';i<='9';i++) {
  -            alphanum.set(i);
  -        }
  +        return new String(bytes, 0, ox);
       }
   
  -    /**
  -     * "Reserved" characters from RFC 2396.
  -     * <pre>reserved = ";" | "/" | "?" | ":" | "@" | "&amp;" | "=" | "+" | "$" | ","</pre>
  -     */
  -    private static final BitSet reserved = new BitSet(256);
  -    /** Static initializer for {@link #reserved}. */
  -    static {
  -        reserved.set(';');
  -        reserved.set('/');
  -        reserved.set('?');
  -        reserved.set(':');
  -        reserved.set('@');
  -        reserved.set('&');
  -        reserved.set('=');
  -        reserved.set('+');
  -        reserved.set('$');
  -        reserved.set(',');
  -    }
   
       /**
  -     * "Mark" characters from RFC 2396.
  -     * <pre>mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"</pre>
  +     * Escape the unescaped URI string.
  +     * 
  +     * @param str The unescaped URI string which has to be rewritten.
        */
  -    private static final BitSet mark = new BitSet(256);
  -    /** Static initializer for {@link #mark}. */
  -    static {
  -        mark.set('-');
  -        mark.set('_');
  -        mark.set('.');
  -        mark.set('!');
  -        mark.set('~');
  -        mark.set('*');
  -        mark.set('\'');
  -        mark.set('(');
  -        mark.set(')');
  +    public static String escape(String str) {
  +        return escape(str, null);
       }
   
  -    /**
  -     * "Unreserved" characters from RFC 2396.
  -     * <pre>unreserved = {@link #alphanum} | {@link #mark}</pre>
  -     */
  -    private static final BitSet unreserved = new BitSet(256);
  -    /** Static initializer for {@link #unreserved}. */
  -    static {
  -        unreserved.or(alphanum);
  -        unreserved.or(mark);
  -    }
   
       /**
  -     * "Delims" characters from RFC 2396.
  -     * <pre>delims = "&lt;" | "&gt;" | "#" | "%" | &lt;"&gt;</pre>
  +     * Escape the unescaped URI string.
  +     * 
  +     * @param str The unescaped URI string which has to be rewritten.
  +     * @param reserved The additional reserved URI character set.
        */
  -    private static final BitSet delims = new BitSet(256);
  -    /** Static initializer for {@link #delims}. */
  -    static {
  -        delims.set('<');
  -        delims.set('>');
  -        delims.set('#');
  -        delims.set('%');
  -        delims.set('"');
  +    public static String escape(String str, BitSet reserved) {
  +        return (str == null) ? null : escape(str.getBytes(), reserved);
       }
   
  +
       /**
  -     * "Unwise" characters from RFC 2396.
  -     * <pre>unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"</pre>
  +     * Escape the unescaped URI byte array.
  +     * 
  +     * @param bytes The unescaped URI byte array which has to be rewritten.
  +     * @param reserved The additional reserved URI character set.
        */
  -    private static final BitSet unwise = new BitSet(256);
  -    /** Static initializer for {@link #unwise}. */
  -    static {
  -        unwise.set('{');
  -        unwise.set('}');
  -        unwise.set('|');
  -        unwise.set('\\');
  -        unwise.set('^');
  -        unwise.set('[');
  -        unwise.set(']');
  -        unwise.set('`');
  -    }
  +    public static String escape(byte[] bytes, BitSet reserved) {
  +        
  +        if (bytes == null)
  +            return (null);
  +        
  +        StringBuffer rewrittenStr = new StringBuffer(bytes.length);
   
  -    private static final BitSet pathReserved = new BitSet(256);
  -    /** Static initializer for {@link #pathReserved}. */
  -    static {
  -        pathReserved.set('/');
  -        pathReserved.set(';');
  -        pathReserved.set('=');
  -        pathReserved.set('?');
  -    }
  -
  -    // ------------------------------ "Safe" Character Sets : Private Constants
  -
  -    private static final BitSet pathSafe = new BitSet(256);
  -    static {
  -        pathSafe.or(unreserved);
  -        pathSafe.or(pathReserved);
  -    }
  -
  -    private static final BitSet queryStringValueSafe = new BitSet(256);
  -    static {
  -        queryStringValueSafe.or(unreserved);
  +        for (int i = 0; i < bytes.length; i++) {
  +            char c = (char) bytes[i];
  +            if (alphanum.get(c)) {
  +                rewrittenStr.append(c);
  +            } else if (reserved != null && reserved.get(c)) {
  +                rewrittenStr.append(c);
  +            } else {
  +                byte toEscape = bytes[i];
  +                rewrittenStr.append('%');
  +                int low = (int) (toEscape & 0x0f);
  +                int high = (int) ((toEscape & 0xf0) >> 4);
  +                rewrittenStr.append(hexadecimal[high]);
  +                rewrittenStr.append(hexadecimal[low]);
  +            }
  +        }
  +        
  +        return rewrittenStr.toString();
       }
   
  -    // -------------------------------------------------------- Private Methods
   
       /**
  -     * Convert a byte character value to hexidecimal digit value.
  +     * Escape the unescaped URI string with character encoding.
        *
  -     * @param b the character value byte
  +     * @param str The string which has to be rewiten.
  +     * @param reserved The additional reserved URI character set.
  +     * @param enc The encoding to use.
  +     *            If wrong, the default encoding is used.
        */
  -    private static final byte convertHexDigit(byte b) {
  -        switch(b) {
  -            case (byte)'0':
  -            case (byte)'1':
  -            case (byte)'2':
  -            case (byte)'3':
  -            case (byte)'4':
  -            case (byte)'5':
  -            case (byte)'6':
  -            case (byte)'7':
  -            case (byte)'8':
  -            case (byte)'9':
  -                return (byte)(b - '0');
  -            case (byte)'a':
  -            case (byte)'b':
  -            case (byte)'c':
  -            case (byte)'d':
  -            case (byte)'e':
  -            case (byte)'f':
  -                return (byte)(b - 'a' + 10);
  -            case (byte)'A':
  -            case (byte)'B':
  -            case (byte)'C':
  -            case (byte)'D':
  -            case (byte)'E':
  -            case (byte)'F':
  -                return (byte)(b - 'A' + 10);
  -            default:
  -                throw new IllegalArgumentException(b + " is not a hex value");
  +    public static String escape(String str, BitSet reserved, String enc) {
  +        try {
  +            return escape(str.getBytes(enc), reserved);
  +        } catch (UnsupportedEncodingException e) {
  +            e.printStackTrace();
  +            return escape(str.getBytes(), reserved);
           }
       }
   
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>