You are viewing a plain text version of this content. The canonical link for it is here.
Posted to slide-dev@jakarta.apache.org by je...@apache.org on 2002/04/24 19:24:21 UTC

cvs commit: jakarta-slide/src/util/org/apache/util URI.java

jericho     02/04/24 10:24:21

  Modified:    src/util/org/apache/util URI.java
  Log:
  - Implemented understanding about usage of URI and character and escape encoding.
  - Notice that It's still experimental.  I will make the testcase for URI when I get some free time.
  
  Revision  Changes    Path
  1.4       +1274 -237 jakarta-slide/src/util/org/apache/util/URI.java
  
  Index: URI.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- URI.java	30 Mar 2002 10:00:49 -0000	1.3
  +++ URI.java	24 Apr 2002 17:24:21 -0000	1.4
  @@ -1,13 +1,13 @@
   /*
  - * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v 1.3 2002/03/30 10:00:49 jericho Exp $
  - * $Revision: 1.3 $
  - * $Date: 2002/03/30 10:00:49 $
  + * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v 1.4 2002/04/24 17:24:21 jericho Exp $
  + * $Revision: 1.4 $
  + * $Date: 2002/04/24 17:24:21 $
    *
    * ====================================================================
    *
  - * the Apache Software License, Version 1.1
  + * The Apache Software License, Version 1.1
    *
  - * Copyright (c) 1999 the Apache Software Foundation.  All rights 
  + * Copyright (c) 1999-2002 the Apache Software Foundation.  All rights 
    * reserved.
    *
    * Redistribution and use in source and binary forms, with or without
  @@ -63,6 +63,7 @@
   
   package org.apache.util;
   
  +import java.io.IOException;
   import java.io.UnsupportedEncodingException;
   import java.util.BitSet;
   import java.security.AccessController;
  @@ -109,7 +110,7 @@
    *   abs_path      = "/"  path_segments
    * </pre></blockquote><p>
    *
  - * the following examples illustrate URI that are in common use.
  + * The following examples illustrate URI that are in common use.
    * ftp://ftp.is.co.za/rfc/rfc1808.txt
    *    -- ftp scheme for File Transfer Protocol services
    * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
  @@ -127,14 +128,16 @@
    * relative URL(RFC 1808).
    *
    * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
  - * @version $Revision: 1.3 $ $Date: 2002/03/14 15:14:01 
  + * @version $Revision: 1.4 $ $Date: 2002/03/14 15:14:01 
    */
   
  -public class URI implements java.io.Serializable {
  +public class URI implements Comparable, java.io.Serializable {
   
   
       // --------------------------------------------------------- Constructors
   
  +    protected URI() {
  +    }
   
       /**
        * Construct a URI from the given string.
  @@ -142,12 +145,12 @@
        *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
        * </pre></blockquote><p>
        *
  -     * @param str the string to be represented to URI character sequence
  +     * @param original the string to be represented to URI character sequence
        * It is one of absoluteURI and relativeURI.
        * @exception Exception
        */
  -    public URI(String str) throws Exception {
  -        parseUriReference(str);
  +    public URI(String original) throws Exception {
  +        parseUriReference(original);
       }
   
   
  @@ -158,15 +161,28 @@
        *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
        *   opaque_part   = uric_no_slash *uric
        * </pre></blockquote><p>
  -     * In general, absolute URI = <scheme>:<scheme-specific-part>#<fragment>
  +     * It's for absolute URI = <scheme>:<scheme-specific-part>#<fragment>
        *
        * @param scheme the scheme string
        * @param scheme_specific_part scheme_specific_part
        * @param fragment the fragment string
        */
       public URI(String scheme, String scheme_specific_part, String fragment)
  -       throws Exception {
  -       // TODO: validate and contruct the URI character sequence
  +    throws Exception {
  +        // validate and contruct the URI character sequence
  +        if (scheme == null) {
  +           throw new IllegalArgumentException("scheme required");
  +        }
  +        char[] s = scheme.toLowerCase().toCharArray();
  +        if (validate(s, this.scheme)) {
  +            _scheme = s; // is_absoluteURI
  +        } else {
  +            throw new IllegalArgumentException("incorrect scheme");
  +        }
  +        _opaque = encode(scheme_specific_part, allowed_opaque_part);
  +        // Set flag
  +        _is_opaque_part = true;
  +        setUriReference();
       }
   
   
  @@ -175,9 +191,11 @@
        * <p><blockquote><pre>
        *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
        *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
  +     *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
        *   hier_part     = ( net_path | abs_path ) [ "?" query ]
        * </pre></blockquote><p>
  -     * In general, generic URI = <scheme>://<authority><path>?<query>#<fragment>
  +     * It's for absolute URI = <scheme>:<path>?<query>#<fragment> and
  +     * relative URI = <path>?<query>#<fragment>
        *
        * @param scheme the scheme string
        * @param authority the authority string
  @@ -187,44 +205,141 @@
        */
       public URI(String scheme, String authority, String path, String query,
                  String fragment) throws Exception {
  -       // TODO: validate and contruct the URI character sequence
  +        // validate and contruct the URI character sequence
  +        StringBuffer buff = new StringBuffer();
  +        if (scheme != null) {
  +            buff.append(scheme);
  +            buff.append(':');
  +        }
  +        if (authority != null) {
  +            buff.append("//");
  +            buff.append(authority);
  +        }
  +        if (path != null) {  // accept empty path
  +            if ((scheme != null || authority != null)
  +                    && !path.startsWith("/")) {
  +                throw new IllegalArgumentException("abs_path requested");
  +            }
  +            buff.append(path);
  +        }
  +        if (query != null) {
  +            buff.append('?');
  +            buff.append(query);
  +        }
  +        if (fragment != null) {
  +            buff.append('#');
  +            buff.append(fragment);
  +        }
  +        parseUriReference(buff.toString());
       }
   
   
       /**
  -     * Construct a general URI with the given relative URI.
  -     * <p><blockquote><pre>
  -     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  -     *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
  -     * </pre></blockquote><p>
  +     * Construct a general URI from the given components.
        *
  -     * @param base the base URI
  -     * @param relative the relative URI
  +     * @param scheme the scheme string
  +     * @param userinfo the userinfo string
  +     * @param host the host string
  +     * @param port the port number
  +     * @param path the path string
  +     * @param query the query string
  +     * @param fragment the fragment string
        */
  -    public URI(URI base, URI relative) throws Exception {
  -       // TODO: validate and contruct the URI character sequence
  +    public URI(String scheme, String userinfo, String host, int port,
  +            String path, String query, String fragment)
  +        throws Exception {
  +        this(scheme, (host == null) ? null :
  +                ((userinfo != null) ? userinfo + '@' : "") + host +
  +                ((port != -1) ? ":" + port : ""), path, query, fragment);
       }
   
   
       /**
  -     * Construct a URI from the given components.
  +     * Construct a general URI from the given components.
        *
        * @param scheme the scheme string
  -     * @param userinfo the userinfo string
        * @param host the host string
  -     * @param port the port number
        * @param path the path string
  -     * @param query the query string
        * @param fragment the fragment string
        */
  -    public URI(String scheme, String userinfo, String host, int port,
  -               String path, String query, String fragment) throws Exception {
  -       // TODO: validate and contruct the URI character sequence
  +    public URI(String scheme, String host, String path, String fragment)
  +    throws Exception {
  +        this(scheme, host, path, null, fragment);
       }
   
   
  +    /**
  +     * Construct a general URI with the given relative URI.
  +     * <p><blockquote><pre>
  +     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  +     *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
  +     * </pre></blockquote><p>
  +     * Resolving Relative References to Absolute Form.
  +     *
  +     * Some URI schemes do not allow a hierarchical syntax matching the
  +     * <hier_part> syntax, and thus cannot use relative references.
  +     *
  +     * @param base the base URI
  +     * @param relative the relative URI
  +     */
  +    public URI(URI base, URI relative) throws Exception {
  +        if (base._scheme != null) {
  +            throw new IllegalArgumentException("base URI required");
  +        }
  +        if (relative._scheme != null && // is_relativeURI
  +                !equals(base._scheme, relative._scheme)) {
  +            throw new IllegalArgumentException("not relative URI");
  +        }
  +        if (base._is_opaque_part || relative._is_opaque_part) {
  +            this._scheme = base._scheme;
  +            this._is_opaque_part = relative._is_opaque_part;
  +            this._opaque = relative._opaque;
  +            this._fragment = relative._fragment;
  +            this.setUriReference();
  +            return;
  +        }
  +        if (base._scheme != null) {
  +            this._scheme = base._scheme;
  +        }
  +        if (relative._authority != null) {
  +            this._is_net_path = relative._is_net_path;
  +            this._authority = relative._authority;
  +            if (relative._is_server) {
  +                this._is_server = relative._is_server;
  +                this._userinfo = relative._userinfo;
  +                this._host = relative._host;
  +                this._port = relative._port;
  +            } else if (relative._is_reg_name) {
  +                this._is_reg_name = relative._is_reg_name;
  +            }
  +        } else if (base._authority != null) {
  +            this._is_net_path = base._is_net_path;
  +            this._authority = base._authority;
  +            if (base._is_server) {
  +                this._userinfo = base._userinfo;
  +                this._host = base._host;
  +                this._port = base._port;
  +            } else if (base._is_reg_name) {
  +                this._is_reg_name = base._is_reg_name;
  +            }
  +        }
  +        // resolve the path
  +        this._path = resolvePath(base._path, relative._path);
  +        // base._query removed
  +        if (relative._query != null) {
  +            this._query = relative._query;
  +        }
  +        // base._fragment removed
  +        if (relative._fragment != null) {
  +            this._fragment = relative._fragment;
  +        }
  +        this.setUriReference();
  +    }
  +
       // --------------------------------------------------- Instance Variables
   
  +    static final long serialVersionUID = 604752400577948726L;
  +
   
       /**
        * This Uniform Resource Identifier (URI).
  @@ -237,7 +352,7 @@
       /**
        * The default charset of the protocol.  RFC 2277, 2396
        */
  -    protected static String _protocolCharset = "UTF-8";
  +    protected static String _protocolCharset = "UTF8";
   
   
       /**
  @@ -259,12 +374,24 @@
   
   
       /**
  +     * The opaque.
  +     */
  +    protected char[] _opaque = null;
  +
  +
  +    /**
        * The authority.
        */
       protected char[] _authority = null;
   
   
       /**
  +     * The userinfo.
  +     */
  +    protected char[] _userinfo = null;
  +
  +
  +    /**
        * The host.
        */
       protected char[] _host = null;
  @@ -294,9 +421,26 @@
       protected char[] _fragment = null;
   
   
  +    /**
  +     * The debug.
  +     */
  +    protected static int debug = 0;
  +
       // ---------------------- Generous characters for each component validation
   
       /**
  +     * The percent "%" character always has the reserved purpose of being the
  +     * escape indicator, it must be escaped as "%25" in order to be used as
  +     * data within a URI.
  +     */
  +    protected static final BitSet percent = new BitSet(256);
  +    // Static initializer for percent
  +    static {
  +        percent.set('%');
  +    }
  +
  +
  +    /**
        * <p><blockquote><pre>
        * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
        *            "8" | "9"
  @@ -368,7 +512,7 @@
       protected static final BitSet escaped = new BitSet(256);
       // Static initializer for escaped
       static {
  -        escaped.set('%');
  +        escaped.or(percent);
           escaped.or(hex);
       }
   
  @@ -395,6 +539,8 @@
   
   
       /**
  +     * Data characters that are allowed in a URI but do not have a reserved
  +     * purpose are called unreserved.
        * <p><blockquote><pre>
        * unreserved    = alphanum | mark
        * </pre></blockquote><p>
  @@ -683,9 +829,9 @@
       protected static final BitSet hostport = new BitSet(256);
       // Static initializer for hostport
       static {
  -        host.or(host);
  -        host.set(':');
  -        host.or(port);
  +        hostport.or(host);
  +        hostport.set(':');
  +        hostport.or(port);
       }
   
   
  @@ -719,7 +865,7 @@
       // Static initializer for server
       static {
           server.or(userinfo);
  -        userinfo.set('@');
  +        server.set('@');
           server.or(hostport);
       }
   
  @@ -883,73 +1029,204 @@
           URI_reference.or(fragment);
       }
   
  +    // ---------------------------- Characters disallowed within the URI syntax
  +    // Excluded US-ASCII Characters are like control, space, delims and unwise
  +
  +    /**
  +     * control
  +     */
  +    public static final BitSet control = new BitSet(256);
  +    // Static initializer for control
  +    static {
  +        for (int i = 0; i <= 0x1F; i++) {
  +            control.set(i);
  +        }
  +        control.set(0x7F);
  +    }
  +
  +    /**
  +     * space
  +     */
  +    public static final BitSet space = new BitSet(256);
  +    // Static initializer for space
  +    static {
  +        space.set(0x20);
  +    }
  +
  +
  +    /**
  +     * delims
  +     */
  +    public static final BitSet delims = new BitSet(256);
  +    // Static initializer for delims
  +    static {
  +        delims.set('<');
  +        delims.set('>');
  +        delims.set('#');
  +        delims.set('%');
  +        delims.set('"');
  +    }
  +
  +
  +    /**
  +     * unwise
  +     */
  +    public static final BitSet unwise = new BitSet(256);
  +    // Static initializer for unwise
  +    static {
  +        unwise.set('{');
  +        unwise.set('}');
  +        unwise.set('|');
  +        unwise.set('\\');
  +        unwise.set('^');
  +        unwise.set('[');
  +        unwise.set(']');
  +        unwise.set('`');
  +    }
  +
  +
  +    /**
  +     * disallowed rel_segment before escaping
  +     */
  +    public static final BitSet disallowed_rel_segment = new BitSet(256);
  +    // Static initializer for disallowed_rel_segment
  +    static {
  +        disallowed_rel_segment.or(uric);
  +        disallowed_rel_segment.andNot(rel_segment);
  +    }
  +
  +
  +    /**
  +     * disallowed opaque_part before escaping
  +     */
  +    public static final BitSet disallowed_opaque_part = new BitSet(256);
  +    // Static initializer for disallowed_opaque_part
  +    static {
  +        disallowed_opaque_part.or(uric);
  +        disallowed_opaque_part.andNot(opaque_part);
  +    }
  +
       // ------------------------------- Characters allowed within each component
   
       /**
        * Those characters that are allowed within the authority component.
        */
  -    public static final BitSet allowedAuthority = new BitSet(256);
  -    // Static initializer for allowedAuthority
  +    public static final BitSet allowed_authority = new BitSet(256);
  +    // Static initializer for allowed_authority
       static {
  -        // FIXME: you can verify with validate method.
  -        allowedAuthority.or(unreserved);
  -        allowedAuthority.or(authority);
  +        allowed_authority.or(authority);
  +        allowed_authority.clear('%');
       }
   
   
       /**
  -     * Those characters that are allowed within the path component.
  +     * Those characters that are allowed within the opaque_part.
        */
  -    public static final BitSet allowedPath = new BitSet(256);
  -    // Static initializer for allowedPath
  +    public static final BitSet allowed_opaque_part = new BitSet(256);
  +    // Static initializer for allowed_opaque_part 
       static {
  -        // FIXME: you can verify with validate method.
  -        allowedPath.or(unreserved);
  -        allowedPath.or(path);
  +        allowed_opaque_part.or(opaque_part);
  +        allowed_opaque_part.clear('%');
       }
   
   
       /**
  -     * Those characters that are allowed within the query component.
  +     * Those characters that are allowed within the reg_name.
        */
  -    public static final BitSet allowedQuery = new BitSet(256);
  -    // Static initializer for allowedQuery
  +    public static final BitSet allowed_reg_name = new BitSet(256);
  +    // Static initializer for allowed_reg_name 
       static {
  -        // FIXME: you can verify with validate method.
  -        allowedQuery.or(unreserved);
  +        allowed_reg_name.or(reg_name);
  +        // allowed_reg_name.andNot(percent);
  +        allowed_reg_name.clear('%');
       }
   
   
       /**
  -     * Those characters that are allowed within the fragment component.
  +     * Those characters that are allowed within the userinfo component.
        */
  -    public static final BitSet allowedFragment = new BitSet(256);
  -    // Static initializer for allowedFragment
  +    public static final BitSet allowed_userinfo = new BitSet(256);
  +    // Static initializer for allowed_userinfo
       static {
  -        // FIXME: you can verify with validate method.
  -        allowedFragment.or(unreserved);
  +        allowed_userinfo.or(userinfo);
  +        // allowed_userinfo.andNot(percent);
  +        allowed_userinfo.clear('%');
       }
   
   
       /**
  -     * Those characters that are allowed within the userinfo component.
  -     * <p><blockquote><pre>
  -     * unwise      = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
  -     * </pre></blockquote><p>
  +     * Those characters that are allowed within the IPv6reference component.
  +     * '[', ']' in IPv6reference should be excluded.
  +     */
  +    public static final BitSet allowed_IPv6reference = new BitSet(256);
  +    // Static initializer for allowed_IPv6reference
  +    static {
  +        allowed_IPv6reference.or(IPv6reference);
  +        // allowed_IPv6reference.andNot(unwise);
  +        allowed_IPv6reference.clear('[');
  +        allowed_IPv6reference.clear(']');
  +    }
  +
  +
  +    /**
  +     * Those characters that are allowed within the host component.
  +     * '[', ']' in IPv6reference should be excluded.
  +     */
  +    public static final BitSet allowed_host = new BitSet(256);
  +    // Static initializer for allowed_host
  +    static {
  +        allowed_host.or(hostname);
  +        allowed_host.or(allowed_IPv6reference);
  +    }
  +
  +
  +    /**
  +     * Those characters that are allowed within the abs_path.
        */
  -    public static final BitSet allowedUnwise = new BitSet(256);
  -    // Static initializer for allowedUnwise
  +    public static final BitSet allowed_abs_path = new BitSet(256);
  +    // Static initializer for allowed_abs_path
       static {
  -        // FIXME: you can verify with validate method.
  -        allowedUnwise.or(unreserved);
  -        // allowedUnwise.or(unwise);
  +        allowed_abs_path.or(abs_path);
  +        // allowed_abs_path.set('/');  // aleady included
  +        allowed_abs_path.clear('%');
  +    }
  +
  +
  +    /**
  +     * Those characters that are allowed within the rel_segment.
  +     */
  +    public static final BitSet allowed_rel_segment = new BitSet(256);
  +    // Static initializer for allowed_rel_segment
  +    static {
  +        allowed_rel_segment.or(rel_segment);
  +        allowed_rel_segment.clear('%');
  +    }
  +
  +
  +    /**
  +     * Those characters that are allowed within the query component.
  +     */
  +    public static final BitSet allowed_query = new BitSet(256);
  +    // Static initializer for allowed_query
  +    static {
  +        allowed_query.or(uric);
  +        allowed_query.clear('%');
  +    }
  +
  +
  +    /**
  +     * Those characters that are allowed within the fragment component.
  +     */
  +    public static final BitSet allowed_fragment = new BitSet(256);
  +    // Static initializer for allowed_fragment
  +    static {
  +        allowed_fragment.or(uric);
  +        allowed_fragment.clear('%');
       }
   
       // ------------------------------------------- Flags for this URI-reference
   
       // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  -    protected boolean _is_absoluteURI;
  -    protected boolean _is_relativeURI;
       // absoluteURI   = scheme ":" ( hier_part | opaque_part )
       protected boolean _is_hier_part;
       protected boolean _is_opaque_part;
  @@ -959,21 +1236,19 @@
       protected boolean _is_abs_path;
       protected boolean _is_rel_path;
       // net_path      = "//" authority [ abs_path ] 
  -    protected boolean _has_authority;
       // authority     = server | reg_name
       protected boolean _is_reg_name;
       protected boolean _is_server;  // = _has_server
       // server        = [ [ userinfo "@" ] hostport ]
  -    protected boolean _has_userinfo;
  -    // hostport      = host [ ":" port ] 
  -    protected boolean _has_hostport;
       // host          = hostname | IPv4address | IPv6reference
       protected boolean _is_hostname;
       protected boolean _is_IPv4address;
       protected boolean _is_IPv6reference;
  +    // query
  +    // fragment
  +    protected boolean _is_only_fragment;
   
  -    // ------------------------------------------------------ Protected methods
  -
  +    // ------------------------------------------ Character and escape encoding
   
       /**
        * This is a two mapping, one from original characters to octets, and
  @@ -989,17 +1264,18 @@
        * @param original the original character sequence
        * @param allowed those characters that are allowed within a component
        * @return URI character sequence
  -     * @exception UnsupportedEncodingException 
  +     * @exception Exception 
  +     * if NullPointerException, null argument
  +     * if UnsupportedEncodingException, unsupported character encoding
        * @see escape
        */
  -    protected char[] encode(String original, BitSet allowed)
  -            throws UnsupportedEncodingException {
  -
  -        if (original == null) return null;
  +    protected char[] encode(String original, BitSet allowed) throws Exception {
  +        // encode original to uri characters.
  +        if (original == null) {
  +            throw new NullPointerException("original");
  +        }
           byte[] octet = original.getBytes(_documentCharset);
  -        // TODO: decode octet to uri characters.
  -        // new String(octet, _protocolCharset);
  -        return null;
  +        return escape(octet, allowed);
       }
   
   
  @@ -1016,13 +1292,15 @@
        *
        * @param octet the octet sequence
        * @return original character sequence
  -     * @exception UnsupportedEncodingException 
  +     * @exception Exception
  +     * if NullPointerException, null argument
  +     * if UnsupportedEncodingException, unsupported character encoding
  +     * if IllegalArgumentException, incomplete trailing escape pattern
        * @see unescape
        */
  -    protected String decode(char[] uri)
  -        throws UnsupportedEncodingException {
  -        // TODO: decode octet to uri characters.
  -        return null;
  +    protected String decode(char[] uri) throws Exception {
  +        // decode uri to original characters.
  +        return new String(unescape(uri), _documentCharset);
       }
   
   
  @@ -1034,16 +1312,42 @@
        *
        * An escaped octet is encoded as a character triplet, consisting of the
        * percent character "%" followed by the two hexadecimal digits
  -     * representing the octet code. For example, "%20" is the escaped
  +     * representing the octet code. For exsurrogateample, "%20" is the escaped
        * encoding for the US-ASCII space character.
        *
        * @param octet the octet sequence to be escaped
        * @param allowed those characters that are allowed within a component
        * @return URI character sequence
  -     */
  -    protected char[] escape(byte[] octet, BitSet allowed) {
  -        // TODO: escape octet to uri characters.
  -        return null;
  +     * @exception Exception 
  +     * if NullPointerException, null argument
  +     * if UnsupportedEncodingException, unsupported character encoding
  +     */
  +    protected char[] escape(byte[] octet, BitSet allowed) throws Exception {
  +        // escape octet to uri characters.
  +        if (octet == null) {
  +            throw new NullPointerException("octet");
  +        }
  +        if (allowed == null) {
  +            throw new NullPointerException("allowed characters");
  +        }
  +        String octets = new String(octet, _protocolCharset);
  +        char[] preuric = new char[octets.length()];
  +        octets.getChars(0, octets.length(), preuric, 0);
  +        StringBuffer buf = new StringBuffer(preuric.length);
  +        for (int i = 0; i < preuric.length; i++) {
  +            char c = (char) preuric[i];
  +            if (allowed.get(c)) {
  +                buf.append(c);
  +            } else {
  +                byte b = (byte) preuric[i];
  +                buf.append('%');
  +                char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
  +                buf.append(hexadecimal);
  +                hexadecimal = Character.forDigit(b & 0xF, 16);
  +                buf.append(hexadecimal);
  +            }
  +        }
  +        return buf.toString().toCharArray();
       }
   
   
  @@ -1053,30 +1357,103 @@
        *   URI character sequence->octet sequence
        * </pre></blockquote><p>
        *
  -     * the percent "%" character always has the reserved purpose of being
  +     * The percent "%" character always has the reserved purpose of being
        * the escape indicator, it must be escaped as "%25" in order to be used
        * as data within a URI.
        *
        * @param uri the URI character sequence
        * @return octet sequence
  +     * @exception Exception
  +     * if NullPointerException, null argument
  +     * if UnsupportedEncodingException, unsupported character encoding
  +     * if IllegalArgumentException, incomplete trailing escape pattern
  +     */
  +    protected byte[] unescape(char[] uri) throws Exception {
  +        // unescape uri characters to octets
  +        if (uri == null) {
  +            throw new NullPointerException("uri");
  +        }
  +        byte[] octet = new String(uri).getBytes(_protocolCharset);
  +        int oi = 0; // output index
  +        for (int ii = 0; ii < uri.length; ) {
  +            byte b = (byte) octet[ii++];
  +            if (b == '%') {
  +                b = (byte) Character.digit(
  +                        (char) (octet[ii++] << 4 + octet[ii++]), 16);
  +                if (b == -1) {
  +                    throw new IllegalArgumentException(
  +                            "incomplete trailing escape pattern");
  +                }
  +            }
  +            octet[oi++] = (byte) b;
  +        }
  +        octet[oi] = (byte) '\0';
  +        return octet;
  +    }
  +
  +
  +    /**
  +     * Pre-validate the unescaped URI string within a specific component.
  +     *
  +     * @param component the component string within the component
  +     * @param disallowed those characters disallowed within the component
  +     * @return if true, it doesn't have the disallowed characters
  +     * if false, the component is undefined or an incorrect one
  +     */
  +    protected boolean prevalidate(String component, BitSet disallowed) {
  +        // prevalidate the given component by disallowed characters
  +        if (component == null) {
  +            return false; // undefined
  +        }
  +        char[] target = component.toCharArray();
  +        for (int i = 0; i < target.length; i++) {
  +            if (disallowed.get(target[i]))
  +                return false;
  +        }
  +        return true;
  +    }
  +
  +
  +    /**
  +     * Validate the URI characters within a specific component.
  +     * The component must be performed after escape encoding. Or it doesn't
  +     * include escaped characters.
  +     *
  +     * @param component the characters sequence within the component
  +     * @param generous those characters that are allowed within a component
  +     * @return if true, it's the correct URI character sequence
        */
  -    protected byte[] unescape(char[] uri) {
  -        // TODO: unescape uri characters to octets
  -        return null;
  +    protected boolean validate(char[] component, BitSet generous) {
  +        // validate each component by generous characters
  +        return validate(component, 0, -1, generous);
       }
   
   
       /**
  -     * Validate the URI characters within the specific component.
  +     * Validate the URI characters within a specific component.
  +     * The component must be performed after escape encoding. Or it doesn't
  +     * include escaped characters.
  +     *
        * It's not that much strict, generous.  The strict validation might be 
        * performed before being called this method.
        *
        * @param component the characters sequence within the component
  +     * @param soffset the starting offset of the given component
  +     * @param eoffset the ending offset of the given component
  +     * if -1, it means the length of the component
        * @param generous those characters that are allowed within a component
  -     * @return if true, it's the URI character sequence
  +     * @return if true, it's the correct URI character sequence
        */
  -    protected boolean validate(char[] component, BitSet generous) {
  -        // TODO: validate each component with generous characters
  +    protected boolean validate(char[] component, int soffset, int eoffset,
  +            BitSet generous) {
  +        // validate each component by generous characters
  +        if (eoffset == -1) {
  +            eoffset = component.length;
  +        }
  +        for (int i = soffset; i < eoffset; i++) {
  +            if (!generous.get(component[i]))
  +                return false;
  +        }
           return true;
       }
   
  @@ -1112,6 +1489,11 @@
        * @exception Exception
        */
       protected void parseUriReference(String original) throws Exception {
  +        // validate and contruct the URI character sequence
  +        
  +        if (original == null) {
  +            throw new IllegalArgumentException("URI-Reference required");
  +        }
   
           /** @
            *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  @@ -1127,23 +1509,12 @@
           }
   
           /**
  -         * The index to start the search from.
  -         */
  -        int from = 0;
  -
  -        /**
            * <p><blockquote><pre>
            *     @@@@@@@@
            *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
            * </pre></blockquote><p>
            */
  -        int at = tmp.indexOf(":/?#", from);
  -
  -        /**
  -         * The next index to start the search to.
  -         * If it's not -1, it's the index to be stopped.
  -         */
  -        int next = -1;
  +        int at = tmp.indexOf(":/?#");
   
           /**
            * <p><blockquote><pre>
  @@ -1152,13 +1523,12 @@
            *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
            * </pre></blockquote><p>
            */
  -        _is_absoluteURI = false;
           if (at > 0 && tmp.charAt(at) == ':') {
  -            _scheme = tmp.substring(0, at).trim().toLowerCase().toCharArray();
  -            // Set flag
  -            _is_absoluteURI = true;
  +            char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
  +            if (validate(target, scheme)) {
  +                _scheme = target;
  +            }
           }
  -        _is_relativeURI = !_is_absoluteURI;
   
           /**
            * The length of the sequence of characters.
  @@ -1173,31 +1543,55 @@
            *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
            * </pre></blockquote><p>
            */
  -        _is_net_path = false;
  -        _is_abs_path = false;
  -        _is_rel_path = false;
  -        _is_hier_part = false;
  -        if (tmp.charAt(at) == '/') {
  +        // Reset flags
  +        _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
  +        if (at >= 0 && tmp.charAt(at) == '/') {
               // Set flag
               _is_hier_part = true;
  -            if (at+1 < length && tmp.charAt(at+1) == '/') {
  -                from = at + 2;
  -                next = tmp.indexOf("/?#", from);  // at, if not -1
  -                parseAuthority((next > 0) ? tmp.substring(from, next) :
  -                    tmp.substring(from));
  +            if (at+2 < length && tmp.charAt(at+1) == '/') {
  +                // the temporaray index to start the search from
  +                int from = at + 2;
  +                int next = tmp.indexOf("/?#", from);  // at, if not -1
  +                if (next == -1) {
  +                    next = tmp.length();
  +                }
  +                parseAuthority(tmp.substring(from, next));
  +                at = next;
                   // Set flag
                   _is_net_path = true;
  -            } else {
  +            }
  +            if (tmp.charAt(at) == '/') {
                   // Set flag
                   _is_abs_path = true;
               }
  -        } else if (_is_relativeURI) {
  -            // Set flag
  -            _is_rel_path = true;
           } else {
  -            // REMINDME: never here or throw an Exception
  +            if (_scheme == null) { // is_relativeURI
  +                // rel_path = rel_segment [ abs_path ]
  +                int next = tmp.indexOf('/');
  +                if (next == -1) {
  +                    next = tmp.length();
  +                }
  +                // validating before escape encoding
  +                if (prevalidate(tmp.substring(at, next),
  +                            disallowed_rel_segment)) {
  +                    // Set flag
  +                    _is_rel_path = true;
  +                }
  +                // REMINDME: let us skip the rest of abs_path to validate
  +            } else { // is_absoluteURI
  +                // validating before escape encoding
  +                if (prevalidate(tmp.substring(at), disallowed_opaque_part)) {
  +                    // Set flag
  +                    _is_opaque_part = true;
  +                }
  +            }
  +            if (!_is_rel_path || !_is_opaque_part) {
  +                // correct validation.  possibly, only fragment.
  +                // is_relativeURI and is_absoluteURI must be false
  +                // Set flag
  +                _is_only_fragment = true;
  +            }
           }
  -        _is_opaque_part = !_is_hier_part;
   
           /**
            * <p><blockquote><pre>
  @@ -1207,10 +1601,13 @@
            * </pre></blockquote><p>
            */
           if (tmp.charAt(at) != '?' && tmp.charAt(at) != '#') {
  -            from = at;
  -            next = tmp.indexOf("?#", from);
  -            _path = encode(tmp.substring(from, next), allowedPath);
  -            if (next > 0) at = next;
  +            int from = at;
  +            int next = tmp.indexOf("?#", from);
  +            if (next == -1) {
  +                next = tmp.length();
  +            }
  +            setPath(tmp.substring(from, next));
  +            at = next;
           }
   
           /**
  @@ -1220,11 +1617,13 @@
            *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
            * </pre></blockquote><p>
            */
  -        if (tmp.charAt(at) == '?') {
  -            from = at + 1;
  -            next = tmp.indexOf('#', from);
  -            _query = encode(tmp.substring(from, next), allowedQuery);
  -            if (next > 0) at = next;
  +        if (at+1 < length && tmp.charAt(at) == '?') {
  +            int from = at + 1;
  +            int next = tmp.indexOf('#', from);
  +            if (next != -1) {
  +                _query = encode(tmp.substring(from, next), allowed_query);
  +                at = next;
  +            }
           }
   
           /**
  @@ -1234,30 +1633,135 @@
            *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
            * </pre></blockquote><p>
            */
  -        if (tmp.charAt(at) == '#') {
  -            from = at + 1;
  -            _fragment = encode(tmp.substring(from), allowedFragment);
  +        if (at+1 < length && tmp.charAt(at) == '#') {
  +            int from = at + 1;
  +            _fragment = encode(tmp.substring(from), allowed_fragment);
           }
  +
  +        // set this URI.
  +        setUriReference();
       }
   
   
       /**
  -     * Parse the authority part.
  +     * Parse the authority component.
        *
  -     * @param original the original character sequence
  -     * @return the original character sequence
  +     * @param original the original character sequence of authority component
        * @exception Exception
  +     * if NumberFormatException, port isn't integer
  +     * if IllegalArgumentException, incorrect Pv6reference or wrong host
        */
       protected void parseAuthority(String original) throws Exception {
  -        // TODO: validate the authroity part and
  -        // confirm the component within the authority part.
  -        //_authority = encode(original, allowedAuthority);
  -        // TODO: Set reg_name, server, userinfo, hostport
  -        // TODO: _is_reg_name, _is_server, _has_userinfo, _is_hostport
  -        // _is_hostname, _is_IPv4address, _is_IPv6reference
  +        // Reset flags
  +        _is_reg_name = _is_server =
  +        _is_hostname = _is_IPv4address = _is_IPv6reference = false;
  +
  +        int from = 0;
  +        int next = original.indexOf('@');
  +        if (next != -1) {  // neither -1 and 0
  +            // if next == 0, for example, in ftp, userinfo = 'anonymous'
  +            // each protocol extented from URI supports the specific userinfo
  +            _userinfo = encode(original.substring(0, next), allowed_userinfo);
  +            from = next + 1;
  +        }
  +        next = original.indexOf('[', from);
  +        if (next >= from) {
  +            next = original.indexOf(']', from);
  +            if (next == -1) {
  +                throw new IllegalArgumentException("IPv6reference");
  +            } else {
  +                next++;
  +            }
  +            // In IPv6reference, '[', ']' should be excluded
  +            _host = encode(original.substring(from, next),
  +                    allowed_IPv6reference);
  +            // Set flag
  +            _is_IPv6reference = true;
  +        } else { // only for !_is_IPv6reference
  +            next = original.indexOf(':', from);
  +            if (next == -1) {
  +                next = original.length();
  +            }
  +            if (validate(_host, IPv4address)) {
  +                _host = original.substring(from, next).toCharArray();
  +                // Set flag
  +                _is_IPv4address = true;
  +            } else if (validate(_host, hostname)) {
  +                _host = original.substring(from, next).toCharArray();
  +                // Set flag
  +                _is_hostname = true;
  +            } else {
  +                // Set flag
  +                _is_reg_name = true;
  +            }
  +        }
  +        if (_is_reg_name) {
  +            // Reset flags for a server-based naming authority
  +            _is_server = _is_hostname = _is_IPv4address =
  +            _is_IPv6reference = false;
  +            // set a registry-based naming authority
  +            _authority = encode(original.toString(), allowed_reg_name);
  +        } else {
  +            if (original.charAt(next) == ':') {
  +                from = next + 1;
  +                _port = Integer.parseInt(original.substring(from));
  +            }
  +            // set a server-based naming authority
  +            StringBuffer buf = new StringBuffer();
  +            if (_userinfo != null) { // has_userinfo
  +                buf.append(_userinfo);
  +                buf.append('@');
  +            }
  +            if (_host != null) {
  +                buf.append(_host);
  +                if (_port != -1) {
  +                    buf.append(':');
  +                    buf.append(_port);
  +                }
  +            }
  +            _authority = buf.toString().toCharArray();
  +            // Set flag
  +            _is_server = true;
  +        }
       }
   
   
  +    /**
  +     * Once it's parsed successfully, set this URI.
  +     *
  +     * @see getRawURI
  +     */
  +    protected void setUriReference() throws Exception {
  +        // set _uri
  +        StringBuffer buf = new StringBuffer();
  +        // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  +        if (_scheme != null) {
  +            buf.append(_scheme);
  +            buf.append(':');
  +        }
  +        if (_is_net_path) {
  +            buf.append("//");
  +            if (_authority != null) { // has_authority
  +                buf.append(_authority);
  +            }
  +        }
  +        if (_opaque != null && _is_opaque_part) {
  +            buf.append(_opaque);
  +        } else if (_path != null) { // _is_hier_part or _is_relativeURI
  +            buf.append(_path);
  +            if (_query != null) { // has_query
  +                buf.append('?');
  +                buf.append(_query);
  +            }
  +        }
  +        if (_fragment != null) { // has_fragment
  +            buf.append('#');
  +            buf.append(_fragment);
  +        }
  +
  +        _uri = buf.toString().toCharArray();
  +    }
  +
       // --------------------------------------------------------- Public methods
     
   
  @@ -1267,7 +1771,7 @@
        * @return true iif this URI is absoluteURI
        */
       public boolean is_absoluteURI() {
  -        return _is_absoluteURI;
  +        return _is_only_fragment ? false : (_scheme != null);
       }
     
   
  @@ -1277,7 +1781,7 @@
        * @return true iif this URI is relativeURI
        */
       public boolean is_relativeURI() {
  -        return _is_relativeURI;
  +        return _is_only_fragment ? false : (_scheme == null);
       }
   
   
  @@ -1303,11 +1807,13 @@
   
       /**
        * Tell whether or not this URI is net_path.
  +     * It's the same function as the has_authority() method.
        *
        * @return true iif this URI is net_path
  +     * @see has_authority
        */
       public boolean is_net_path() {
  -        return _is_net_path;
  +        return _is_net_path || (_authority != null);
       }
   
   
  @@ -1333,11 +1839,13 @@
   
       /**
        * Tell whether or not this URI has authority.
  +     * It's the same function as the is_net_path() method.
        *
        * @return true iif this URI has authority
  +     * @see is_net_path
        */
       public boolean has_authority() {
  -        return _has_authority;
  +        return (_authority != null) || _is_net_path;
       }
   
       /**
  @@ -1366,21 +1874,11 @@
        * @return true iif this URI has userinfo
        */
       public boolean has_userinfo() {
  -        return _has_userinfo;
  +        return (_userinfo != null);
       }
     
   
       /**
  -     * Tell whether or not this URI has hostport.
  -     *
  -     * @return true iif this URI has hostport
  -     */
  -    public boolean has_hostport() {
  -        return _has_hostport;
  -    }
  -
  -
  -    /**
        * Tell whether or not this URI is hostname.
        *
        * @return true iif this URI is hostname
  @@ -1408,6 +1906,27 @@
       public boolean is_IPv6reference() {
           return _is_IPv6reference;
       }
  +
  +
  +    /**
  +     * Tell whether or not this URI has query.
  +     *
  +     * @return true iif this URI has query
  +     */
  +    public boolean has_query() {
  +        return (_query != null);
  +    }
  +   
  +
  +    /**
  +     * Tell whether or not this URI has fragment.
  +     *
  +     * @return true iif this URI has fragment
  +     */
  +    public boolean has_fragment() {
  +        return (_fragment != null);
  +    }
  +   
      
       // ---------------------------------------------------------------- Charset
   
  @@ -1431,7 +1950,7 @@
        * An individual URI scheme may require a single charset, define a default
        * charset, or provide a way to indicate the charset used.
        *
  -     * @return charset
  +     * @return the charset string
        */
       public String getProtocolCharset() {
           return _protocolCharset;
  @@ -1451,7 +1970,7 @@
       /**
        * Get the default charset of the document.
        *
  -     * @return charset
  +     * @return the charset string
        */
       public String getDocumentCharset() {
           return _documentCharset;
  @@ -1463,24 +1982,57 @@
        * Get the scheme.
        *
        * @return the scheme
  -     * @exception Exception
  -     * UnsupportedEncodingException
        */
  -    public String getScheme() throws Exception {
  -        return decode(_scheme);
  +    public char[] getRawScheme() {
  +        return _scheme;
  +    }
  +
  +
  +    /**
  +     * Get the scheme.
  +     *
  +     * @return the scheme
  +     */
  +    public String getScheme() {
  +        return new String(_scheme);
       }
   
       // ---------------------------------------------------------- The authority
   
       /**
  -     * Set the authority.
  +     * Set the authority.  It can be one type of server, hostport, hostname,
  +     * IPv4address, IPv6reference and reg_name.
  +     * <p><blockquote><pre>
  +     *   authority     = server | reg_name
  +     * </pre></blockquote><p>
        *
        * @param the authority
        * @exception Exception
        * UnsupportedEncodingException
        */
       public void setAuthority(String authority) throws Exception {
  -        _authority = encode(authority, allowedAuthority);
  +        parseAuthority(authority);
  +        setUriReference();
  +    }
  +
  +
  +    /**
  +     * Get the raw-escaped authority.
  +     *
  +     * @return the raw-escaped authority
  +     */
  +    public char[] getRawAuthority() {
  +        return _authority;
  +    }
  +
  +
  +    /**
  +     * Get the escaped authority.
  +     *
  +     * @return the escaped authority
  +     */
  +    public String getEscapedAuthority() {
  +        return new String(_authority);
       }
   
   
  @@ -1489,55 +2041,89 @@
        *
        * @return the authority
        * @exception Exception
  -     * UnsupportedEncodingException
  +     * @see decode
        */
       public String getAuthority() throws Exception {
           return decode(_authority);
       }
   
  -    // --------------------------------------------------------------- The host
  +    // ----------------------------------------------------------- The userinfo
  +
  +    /**
  +     * Get the raw-escaped userinfo.
  +     *
  +     * @return the raw-escaped userinfo
  +     * @see getAuthority
  +     */
  +    public char[] getRawUserinfo() {
  +        return _userinfo;
  +    }
  +
  +
  +    /**
  +     * Get the escaped userinfo.
  +     *
  +     * @return the escaped userinfo
  +     * @see getAuthority
  +     */
  +    public String getEscapedUserinfo() {
  +        return new String(_userinfo);
  +    }
  +
   
       /**
  -     * Set the host.
  +     * Get the userinfo.
        *
  -     * @param the host
  +     * @return the userinfo
        * @exception Exception
  -     * UnsupportedEncodingException
  +     * @see decode
  +     * @see getAuthority
        */
  -    public void setHost(String host) throws Exception {
  -        // Support the non-ASCII host configuration
  -        _host = encode(host, this.host); // Notice that there isn't allowedHost
  +    public String getUserinfo() throws Exception {
  +        return decode(_userinfo);
       }
   
  +    // --------------------------------------------------------------- The host
   
       /**
        * Get the host.
  +     * <p><blockquote><pre>
  +     *   host          = hostname | IPv4address | IPv6reference
  +     * </pre></blockquote><p>
        *
        * @return the host
  -     * @exception Exception
  -     * UnsupportedEncodingException
  +     * @see getAuthority
        */
  -    public String getHost() throws Exception {
  -        // Support the non-ASCII host configuration
  -        return decode(_host);
  +    public char[] getRawHost() {
  +        return _host;
       }
   
  -    // --------------------------------------------------------------- The port
   
       /**
  -     * Set the port.
  +     * Get the host.
  +     * <p><blockquote><pre>
  +     *   host          = hostname | IPv4address | IPv6reference
  +     * </pre></blockquote><p>
        *
  -     * @param the port
  +     * @return the host
  +     * @exception Exception
  +     * @see decode
  +     * @see getAuthority
        */
  -    public void setPort(int port) {
  -        _port = port;
  +    public String getHost() throws Exception {
  +        return decode(_host);
       }
   
  +    // --------------------------------------------------------------- The port
   
       /**
  -     * Get the port.
  +     * Get the port.  In order to get the specfic default port, the specific
  +     * protocol-supported class extended from the URI class should be used.
  +     * It has the server-based naming authority.
        *
        * @return the port
  +     * if -1, it has the default port for the scheme or the server-based
  +     * naming authority is not supported in the specific URI.
        */
       public int getPort() {
           return _port;
  @@ -1546,39 +2132,249 @@
       // --------------------------------------------------------------- The path
   
       /**
  -     * Set the path.
  +     * Set the path.   The method couldn't be used by API programmers.
        *
  -     * @param the path string
  +     * @param path the path string
        * @exception Exception
  -     * UnsupportedEncodingException
  +     * if IllegalArgumentException, set incorrectly or fragment only
  +     * @see encode
        */
  -    public void setPath(String path) throws Exception {
  -        _path = encode(path, allowedPath);
  +    protected void setPath(String path) throws Exception {
  +        // set path
  +        if (_is_net_path || _is_abs_path) {
  +            _path = encode(path, allowed_abs_path);
  +        } else if (_is_rel_path) {
  +            StringBuffer buff = new StringBuffer(path.length());
  +            int at = path.indexOf('/');
  +            if (at > 0) {  // never 0
  +                buff.append(encode(path.substring(0, at), allowed_rel_segment));
  +                buff.append(encode(path.substring(at), allowed_abs_path));
  +            } else {
  +                buff.append(encode(path, allowed_rel_segment));
  +            }
  +            _path = buff.toString().toCharArray();
  +        } else if (_is_opaque_part) {
  +            _opaque = encode(path, allowed_opaque_part);
  +        } else {
  +            throw new IllegalArgumentException("incorrect path");
  +        }
       }
   
   
       /**
  -     * Get the path.
  +     * Resolve the base and relative path.
        *
  -     * @return the path string
  +     * @param base_path a character array of the base_path
  +     * @param rel_path a character array of the rel_path
  +     * @return the resolved path
  +     */
  +    protected char[] resolvePath(char[] base_path, char[] rel_path) {
  +        // REMINDME: paths are never null
  +        String base = new String(base_path);
  +        int at = base.lastIndexOf('/');
  +        if (at != -1) {
  +            base_path = base.substring(0, at + 1).toCharArray();
  +        }
  +        // _path could be empty
  +        if (rel_path.length == 0) {
  +            return normalize(base_path);
  +        } else if (rel_path[0] == '/') {
  +            return rel_path;
  +        } else {
  +            StringBuffer buff = new StringBuffer(base.length() +
  +                rel_path.length);
  +            if (at != -1) {
  +                buff.append(base.substring(0, at + 1));
  +                buff.append(rel_path);
  +            }
  +            return normalize(buff.toString().toCharArray());
  +        }
  +    }
  +
  +
  +    /**
  +     * Get the raw-escaped current hierarchy level in the given path.
  +     *
  +     * @param path the path
  +     * @return the current hierarchy level
        * @exception Exception
  -     * UnsupportedEncodingException
  +     * if IllegalArgumentException, no hierarchy level
  +     * if NullPointerException, null argument
        */
  -    public String getPath() throws Exception {
  -        return decode(_path);
  +    protected char[] getRawCurrentHierPath(char[] path) throws Exception {
  +        if (_is_opaque_part) {
  +            throw new IllegalArgumentException("no hierarchy level");
  +        }
  +        if (path == null) {
  +            throw new NullPointerException("null argument");
  +        }
  +        String buff = new String(path);
  +        int first = buff.indexOf('/');
  +        int last = buff.lastIndexOf('/');
  +        if (first != last && last != -1) {
  +            return buff.substring(0, last).toCharArray();
  +        }
  +        // FIXME: it could be a document on the server side
  +        return path;
  +    }
  +
  +
  +    /**
  +     * Get the raw-escaped current hierarchy level.
  +     *
  +     * @return the raw-escaped current hierarchy level
  +     * @exception Exception
  +     */
  +    public char[] getRawCurrentHierPath() throws Exception {
  +        return getRawCurrentHierPath(_path);
  +    }
  + 
  +
  +    /**
  +     * Get the escaped current hierarchy level.
  +     *
  +     * @return the escaped current hierarchy level
  +     * @exception Exception
  +     */
  +    public String getEscapedCurrentHierPath() throws Exception {
  +        return new String(getRawCurrentHierPath());
  +    }
  + 
  +
  +    /**
  +     * Get the current hierarchy level.
  +     *
  +     * @return the current hierarchy level
  +     * @exception Exception
  +     * @see decode
  +     */
  +    public String getCurrentHierPath() throws Exception {
  +        return decode(getRawCurrentHierPath());
  +    }
  +
  +
  +    /**
  +     * Get the level above the this hierarchy level.
  +     *
  +     * @return the raw above hierarchy level
  +     * @exception Exception
  +     */
  +    public char[] getRawAboveHierPath() throws Exception {
  +        return getRawCurrentHierPath(getRawCurrentHierPath());
  +    }
  +
  +
  +    /**
  +     * Get the level above the this hierarchy level.
  +     *
  +     * @return the raw above hierarchy level
  +     * @exception Exception
  +     */
  +    public String getEscapedAboveHierPath() throws Exception {
  +        return new String(getRawAboveHierPath());
  +    }
  +
  +
  +    /**
  +     * Get the level above the this hierarchy level.
  +     *
  +     * @return the above hierarchy level
  +     * @exception Exception
  +     * @see decode
  +     */
  +    public String getAboveHierPath() throws Exception {
  +        return decode(getRawAboveHierPath());
  +    }
  +
  +
  +    /**
  +     * Get the raw-escaped path.
  +     * <p><blockquote><pre>
  +     *   path          = [ abs_path | opaque_part ]
  +     * </pre></blockquote><p>
  +     * @return the raw-escaped path
  +     */
  +    public char[] getRawPath() {
  +        return _is_opaque_part ? _opaque : _path;
       }
   
   
       /**
        * Get the escaped path.
  +     * <p><blockquote><pre>
  +     *   path          = [ abs_path | opaque_part ]
  +     *   abs_path      = "/"  path_segments 
  +     *   opaque_part   = uric_no_slash *uric
  +     * </pre></blockquote><p>
        *
        * @return the escaped path string
        */
       public String getEscapedPath() {
  -        return new String(_path);
  +        return new String(_is_opaque_part ? _opaque :_path);
       }
   
   
  +    /**
  +     * Get the path.
  +     * <p><blockquote><pre>
  +     *   path          = [ abs_path | opaque_part ]
  +     * </pre></blockquote><p>
  +     * @return the path string
  +     * @exception Exception
  +     * @decode
  +     */
  +    public String getPath() throws Exception {
  +        return decode(getRawPath());
  +    }
  +
  +    // ----------------------------------------------------- The path and query 
  +
  +    /**
  +     * Get the raw-escaped path and query.
  +     *
  +     * @return the raw-escaped path and query
  +     * @exception NullPointerException path undefined
  +     */
  +    public char[] getRawPathQuery() {
  +        if (_path == null) {
  +            throw new NullPointerException("path undefined");
  +        }
  +        int len = _path.length;
  +        if (_query != null) {
  +            len += 1 + _query.length;
  +        }
  +        StringBuffer buff = new StringBuffer(len);
  +        buff.append(_path);
  +        if (_query != null) {
  +            buff.append('?');
  +            buff.append(_query);
  +        }
  +        return buff.toString().toCharArray();
  +    }
  +
  +
  +    /**
  +     * Get the escaped query.
  +     *
  +     * @return the escaped path and query string
  +     * @exception NullPointerException path undefined
  +     */
  +    public String getEscapedPathQuery() {
  +        return new String(getRawPathQuery());
  +    }
  +
  +
  +    /**
  +     * Get the path and query.
  +     *
  +     * @return the path and query string.
  +     * @exception Exception
  +     * @decode
  +     */
  +    public String getPathQuery() throws Exception {
  +        return decode(getRawPathQuery());
  +    }
  +
       // -------------------------------------------------------------- The query 
   
       /**
  @@ -1586,100 +2382,341 @@
        *
        * @param the query string.
        * @exception Exception
  -     * UnsupportedEncodingException
  +     * @encode
        */
       public void setQuery(String query) throws Exception {
  -        _query = encode(query, allowedQuery);
  +        _query = encode(query, allowed_query);
  +        setUriReference();
       }
   
   
       /**
  -     * Get the query.
  +     * Get the raw-escaped query.
        *
  -     * @return the query string.
  -     * @exception Exception
  -     * UnsupportedEncodingException
  +     * @return the raw-escaped query
        */
  -    public String getQuery() throws Exception {
  -        return decode(_query);
  +    public char[] getRawQuery() {
  +        return _query;
       }
   
   
       /**
        * Get the escaped query.
        *
  -     * @return the escaped query string.
  +     * @return the escaped query string
        */
       public String getEscapedQuery() {
           return new String(_query);
       }
   
  +
  +    /**
  +     * Get the query.
  +     *
  +     * @return the query string.
  +     * @exception Exception
  +     * @decode
  +     */
  +    public String getQuery() throws Exception {
  +        return decode(_query);
  +    }
  +
       // ----------------------------------------------------------- The fragment 
   
       /**
        * Set the fragment.
        *
  +     * An empty URI reference represents the base URI of the current document
  +     * and should be replaced by that URI when transformed into a request.
  +     *
        * @param the fragment string.
        * @exception Exception
        * UnsupportedEncodingException
        */
       public void setFragment(String fragment) throws Exception {
  -        _fragment = encode(fragment, allowedFragment);
  +        _fragment = encode(fragment, allowed_fragment);
  +        setUriReference();
       }
   
   
       /**
  -     * Get the fragment.
  +     * Get the raw-escaped fragment.
        *
  -     * @return the fragment string.
  -     * @exception Exception
  -     * UnsupportedEncodingException
  +     * The optional fragment identifier is not part of a URI, but is often used
  +     * in conjunction with a URI.
  +     *
  +     * The format and interpretation of fragment identifiers is dependent on
  +     * the media type [RFC2046] of the retrieval result.
  +     *
  +     * A fragment identifier is only meaningful when a URI reference is
  +     * intended for retrieval and the result of that retrieval is a document
  +     * for which the identified fragment is consistently defined.
  +     *
  +     * @return the raw-escaped fragment
        */
  -    public String getFragment() throws Exception {
  -        return decode(_fragment);
  +    public char[] getRawFragment() {
  +        return _fragment;
       }
   
  +
       /**
        * Get the escaped fragment.
        *
  -     * @return the escaped fragment string.
  +     * @return the escaped fragment string
        */
       public String getEscapedFragment() {
           return new String(_fragment);
       }
   
  +
  +    /**
  +     * Get the fragment.
  +     *
  +     * @return the fragment string
  +     * @exception Exception
  +     * @decode
  +     */
  +    public String getFragment() throws Exception {
  +        return decode(_fragment);
  +    }
  +
       // ------------------------------------------------------------- Utilities 
   
       /**
  -     * Normalize this URI.
  +     * Normalize the given hier path part.
  +     *
  +     * @param path the path to normalize
  +     * @return the normalized path
  +     */
  +    protected char[] normalize(char[] path) {
  +        if (path == null) {
  +            return null;
  +        }
  +        String normalized = new String(path);
  +        // precondition
  +        if (!normalized.endsWith("/")) {
  +            normalized += '/';
  +        }
  +        // Resolve occurrences of "/./" in the normalized path
  +        while (true) {
  +            int at = normalized.indexOf("/./");
  +            if (at == -1) {
  +                break;
  +            }
  +            normalized = normalized.substring(0, at) +
  +            normalized.substring(at + 2);
  +        }
  +        // Resolve occurrences of "/../" in the normalized path
  +        while (true) {
  +            int at = normalized.indexOf("/../");
  +            if (at == -1) {
  +                break;
  +            }
  +            if (at == 0) {
  +                normalized = "/";
  +                break;
  +            }
  +            int backward = normalized.lastIndexOf('/', at - 1);
  +            if (backward == -1) {
  +                // consider the rel_path
  +                normalized = normalized.substring(at + 4);
  +            } else {
  +                normalized = normalized.substring(0, backward) +
  +                normalized.substring(at + 3);
  +            }
  +        }
  +        // Resolve occurrences of "//" in the normalized path
  +        while (true) {
  +            int at = normalized.indexOf("//");
  +            if (at == -1) {
  +                break;
  +            }
  +            normalized = normalized.substring(0, at) +
  +            normalized.substring(at + 1);
  +        }
  +        // Set the normalized path that we have completed
  +        return normalized.toCharArray();
  +    }
  +
  +
  +    /**
  +     * Normalize the path part of this URI.
  +     */
  +    public void normalize() {
  +        _path = normalize(_path);
  +    }
  +
  +
  +    /**
  +     * Set debug mode
        *
  -     * @return the normalized URI
  +     * @param level the level of debug mode
        */
  -    public URI normalize() {
  -        // TODO: normalize and return URI
  -        return null;
  +    public void setDebug(int level) {
  +        debug = level;
       }
   
  +
       /**
  -     * Test a object if this is equal with another.
  +     * Test if the first array is equal to the second array.
  +     *
  +     * @param first the first character array
  +     * @param second the second character array
  +     * @return true if they're equal
  +     */
  +    protected boolean equals(char[] first, char[] second) {
  +        if (first == null && second == null) {
  +            return true;
  +        }
  +        if (first == null || second == null) {
  +            return false;
  +        }
  +        if (first.length != second.length) {
  +            return false;
  +        }
  +        for (int i = 0; i < first.length; i++) {
  +            if (first[i] != second[i]) {
  +                return false;
  +            }
  +        }
  +        return true;
  +    }
  +
  +
  +    /**
  +     * Test an object if this URI is equal to another.
        *
        * @param obj an object to compare
        * @return true if two URI objects are equal
        */
       public boolean equals(Object obj) {
  -        // TODO: normalize and test each components
  -        return false;
  +        // normalize and test each components
  +        if (obj == this) {
  +            return true;
  +        }
  +        if (!(obj instanceof URI)) {
  +            return false;
  +        }
  +        URI another = (URI) obj;
  +        // scheme
  +        if (!equals(_scheme, another._scheme)) {
  +            return false;
  +        }
  +        // is_opaque_part or is_hier_part?  and opaque
  +        if (!equals(_opaque, another._opaque)) {
  +            return false;
  +        }
  +        // is_hier_part
  +        // has_authority
  +        if (!equals(_authority, another._authority)) {
  +            return false;
  +        }
  +        // path
  +        if (!equals(_path, another._path)) {
  +            return false;
  +        }
  +        // has_query
  +        if (!equals(_query, another._query)) {
  +            return false;
  +        }
  +        // has_fragment?  should be careful of the only fragment case.
  +        if (!equals(_fragment, another._fragment)) {
  +            return false;
  +        }
  +        return true;
  +    }
  +
  +    // ---------------------------------------------------------- Serialization
  +
  +    /**
  +     * Write the content of this URI.
  +     *
  +     * @param oos the object-output stream
  +     */
  +    protected void writeObject(java.io.ObjectOutputStream oos)
  +    throws IOException {
  +        oos.defaultWriteObject();
       }
   
  +
  +    /**
  +     * Read a URI.
  +     *
  +     * @param ois the object-input stream
  +     */
  +    protected void readObject(java.io.ObjectInputStream ois)
  +    throws ClassNotFoundException, IOException {
  +        ois.defaultReadObject();
  +    }
  +
  +    // ------------------------------------------------------------- Comparison 
  +
  +    /**
  +     * Compare this URI to another object. 
  +     *
  +     * @param obj the object to be compared.
  +     * @return 0, if it's same
  +     * @exception ClassCastException not URI argument
  +     * @exception NullPointerException character encoding error or null object
  +     */
  +    public int compareTo(Object obj) {
  +        URI another = (URI) obj;
  +        return toString().compareTo(another.toString());
  +    }
  +
  +    // ------------------------------------------------------------ Get the URI
  +
  +    /**
  +     * It can be gotten the URI character sequence. It's raw-escaped.
  +     * For the purpose of the protocol to be transported, it will be useful.
  +     *
  +     * @return URI character sequence
  +     */
  +    public char[] getRawURI() {
  +        return _uri;
  +    }
  +
  +
  +    /**
  +     * It can be gotten the URI character sequence. It's escaped.
  +     * For the purpose of the protocol to be transported, it will be useful.
  +     *
  +     * @return the URI string
  +     */
  +    public String getEscapedURI() {
  +        return new String(_uri);
  +    }
       
  +
       /**
  -     * Get the escaped URI string.
  -     * For the purpose of the protocol to be transported, it's useful.
  +     * It can be gotten the URI character sequence.
  +     *
  +     * @return the URI string
  +     * @exception Exception
  +     * @decode
  +     */
  +    public String getURI() throws Exception {
  +        return decode(_uri);
  +    }
  +
  +
  +    /**
  +     * Get the escaped URI string.  It doesn't throw any exception.
  +     * However, if there is an error, null is returned.
        *
        * @return the escaped URI string
  +     * if null, error
        */
       public String toString() {
  -        return new String(_uri);
  +        String s = null;
  +        try {
  +            s = getURI();
  +        } catch (Throwable t) {
  +            if (debug > 0) {
  +                t.getMessage();
  +            }
  +        } 
  +        return s;
       }
   
   }
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>