You are viewing a plain text version of this content. The canonical link for it is here.
Posted to slide-dev@jakarta.apache.org by je...@apache.org on 2002/04/24 19:24:21 UTC
cvs commit: jakarta-slide/src/util/org/apache/util URI.java
jericho 02/04/24 10:24:21
Modified: src/util/org/apache/util URI.java
Log:
- Implemented understanding about usage of URI and character and escape encoding.
- Notice that It's still experimental. I will make the testcase for URI when I get some free time.
Revision Changes Path
1.4 +1274 -237 jakarta-slide/src/util/org/apache/util/URI.java
Index: URI.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- URI.java 30 Mar 2002 10:00:49 -0000 1.3
+++ URI.java 24 Apr 2002 17:24:21 -0000 1.4
@@ -1,13 +1,13 @@
/*
- * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v 1.3 2002/03/30 10:00:49 jericho Exp $
- * $Revision: 1.3 $
- * $Date: 2002/03/30 10:00:49 $
+ * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v 1.4 2002/04/24 17:24:21 jericho Exp $
+ * $Revision: 1.4 $
+ * $Date: 2002/04/24 17:24:21 $
*
* ====================================================================
*
- * the Apache Software License, Version 1.1
+ * The Apache Software License, Version 1.1
*
- * Copyright (c) 1999 the Apache Software Foundation. All rights
+ * Copyright (c) 1999-2002 the Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -63,6 +63,7 @@
package org.apache.util;
+import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.BitSet;
import java.security.AccessController;
@@ -109,7 +110,7 @@
* abs_path = "/" path_segments
* </pre></blockquote><p>
*
- * the following examples illustrate URI that are in common use.
+ * The following examples illustrate URI that are in common use.
* ftp://ftp.is.co.za/rfc/rfc1808.txt
* -- ftp scheme for File Transfer Protocol services
* gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
@@ -127,14 +128,16 @@
* relative URL(RFC 1808).
*
* @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
- * @version $Revision: 1.3 $ $Date: 2002/03/14 15:14:01
+ * @version $Revision: 1.4 $ $Date: 2002/03/14 15:14:01
*/
-public class URI implements java.io.Serializable {
+public class URI implements Comparable, java.io.Serializable {
// --------------------------------------------------------- Constructors
+ protected URI() {
+ }
/**
* Construct a URI from the given string.
@@ -142,12 +145,12 @@
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
* </pre></blockquote><p>
*
- * @param str the string to be represented to URI character sequence
+ * @param original the string to be represented to URI character sequence
* It is one of absoluteURI and relativeURI.
* @exception Exception
*/
- public URI(String str) throws Exception {
- parseUriReference(str);
+ public URI(String original) throws Exception {
+ parseUriReference(original);
}
@@ -158,15 +161,28 @@
* absoluteURI = scheme ":" ( hier_part | opaque_part )
* opaque_part = uric_no_slash *uric
* </pre></blockquote><p>
- * In general, absolute URI = <scheme>:<scheme-specific-part>#<fragment>
+ * It's for absolute URI = <scheme>:<scheme-specific-part>#<fragment>
*
* @param scheme the scheme string
* @param scheme_specific_part scheme_specific_part
* @param fragment the fragment string
*/
public URI(String scheme, String scheme_specific_part, String fragment)
- throws Exception {
- // TODO: validate and contruct the URI character sequence
+ throws Exception {
+ // validate and contruct the URI character sequence
+ if (scheme == null) {
+ throw new IllegalArgumentException("scheme required");
+ }
+ char[] s = scheme.toLowerCase().toCharArray();
+ if (validate(s, this.scheme)) {
+ _scheme = s; // is_absoluteURI
+ } else {
+ throw new IllegalArgumentException("incorrect scheme");
+ }
+ _opaque = encode(scheme_specific_part, allowed_opaque_part);
+ // Set flag
+ _is_opaque_part = true;
+ setUriReference();
}
@@ -175,9 +191,11 @@
* <p><blockquote><pre>
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
* absoluteURI = scheme ":" ( hier_part | opaque_part )
+ * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
* hier_part = ( net_path | abs_path ) [ "?" query ]
* </pre></blockquote><p>
- * In general, generic URI = <scheme>://<authority><path>?<query>#<fragment>
+ * It's for absolute URI = <scheme>:<path>?<query>#<fragment> and
+ * relative URI = <path>?<query>#<fragment>
*
* @param scheme the scheme string
* @param authority the authority string
@@ -187,44 +205,141 @@
*/
public URI(String scheme, String authority, String path, String query,
String fragment) throws Exception {
- // TODO: validate and contruct the URI character sequence
+ // validate and contruct the URI character sequence
+ StringBuffer buff = new StringBuffer();
+ if (scheme != null) {
+ buff.append(scheme);
+ buff.append(':');
+ }
+ if (authority != null) {
+ buff.append("//");
+ buff.append(authority);
+ }
+ if (path != null) { // accept empty path
+ if ((scheme != null || authority != null)
+ && !path.startsWith("/")) {
+ throw new IllegalArgumentException("abs_path requested");
+ }
+ buff.append(path);
+ }
+ if (query != null) {
+ buff.append('?');
+ buff.append(query);
+ }
+ if (fragment != null) {
+ buff.append('#');
+ buff.append(fragment);
+ }
+ parseUriReference(buff.toString());
}
/**
- * Construct a general URI with the given relative URI.
- * <p><blockquote><pre>
- * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
- * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
- * </pre></blockquote><p>
+ * Construct a general URI from the given components.
*
- * @param base the base URI
- * @param relative the relative URI
+ * @param scheme the scheme string
+ * @param userinfo the userinfo string
+ * @param host the host string
+ * @param port the port number
+ * @param path the path string
+ * @param query the query string
+ * @param fragment the fragment string
*/
- public URI(URI base, URI relative) throws Exception {
- // TODO: validate and contruct the URI character sequence
+ public URI(String scheme, String userinfo, String host, int port,
+ String path, String query, String fragment)
+ throws Exception {
+ this(scheme, (host == null) ? null :
+ ((userinfo != null) ? userinfo + '@' : "") + host +
+ ((port != -1) ? ":" + port : ""), path, query, fragment);
}
/**
- * Construct a URI from the given components.
+ * Construct a general URI from the given components.
*
* @param scheme the scheme string
- * @param userinfo the userinfo string
* @param host the host string
- * @param port the port number
* @param path the path string
- * @param query the query string
* @param fragment the fragment string
*/
- public URI(String scheme, String userinfo, String host, int port,
- String path, String query, String fragment) throws Exception {
- // TODO: validate and contruct the URI character sequence
+ public URI(String scheme, String host, String path, String fragment)
+ throws Exception {
+ this(scheme, host, path, null, fragment);
}
+ /**
+ * Construct a general URI with the given relative URI.
+ * <p><blockquote><pre>
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
+ * </pre></blockquote><p>
+ * Resolving Relative References to Absolute Form.
+ *
+ * Some URI schemes do not allow a hierarchical syntax matching the
+ * <hier_part> syntax, and thus cannot use relative references.
+ *
+ * @param base the base URI
+ * @param relative the relative URI
+ */
+ public URI(URI base, URI relative) throws Exception {
+ if (base._scheme != null) {
+ throw new IllegalArgumentException("base URI required");
+ }
+ if (relative._scheme != null && // is_relativeURI
+ !equals(base._scheme, relative._scheme)) {
+ throw new IllegalArgumentException("not relative URI");
+ }
+ if (base._is_opaque_part || relative._is_opaque_part) {
+ this._scheme = base._scheme;
+ this._is_opaque_part = relative._is_opaque_part;
+ this._opaque = relative._opaque;
+ this._fragment = relative._fragment;
+ this.setUriReference();
+ return;
+ }
+ if (base._scheme != null) {
+ this._scheme = base._scheme;
+ }
+ if (relative._authority != null) {
+ this._is_net_path = relative._is_net_path;
+ this._authority = relative._authority;
+ if (relative._is_server) {
+ this._is_server = relative._is_server;
+ this._userinfo = relative._userinfo;
+ this._host = relative._host;
+ this._port = relative._port;
+ } else if (relative._is_reg_name) {
+ this._is_reg_name = relative._is_reg_name;
+ }
+ } else if (base._authority != null) {
+ this._is_net_path = base._is_net_path;
+ this._authority = base._authority;
+ if (base._is_server) {
+ this._userinfo = base._userinfo;
+ this._host = base._host;
+ this._port = base._port;
+ } else if (base._is_reg_name) {
+ this._is_reg_name = base._is_reg_name;
+ }
+ }
+ // resolve the path
+ this._path = resolvePath(base._path, relative._path);
+ // base._query removed
+ if (relative._query != null) {
+ this._query = relative._query;
+ }
+ // base._fragment removed
+ if (relative._fragment != null) {
+ this._fragment = relative._fragment;
+ }
+ this.setUriReference();
+ }
+
// --------------------------------------------------- Instance Variables
+ static final long serialVersionUID = 604752400577948726L;
+
/**
* This Uniform Resource Identifier (URI).
@@ -237,7 +352,7 @@
/**
* The default charset of the protocol. RFC 2277, 2396
*/
- protected static String _protocolCharset = "UTF-8";
+ protected static String _protocolCharset = "UTF8";
/**
@@ -259,12 +374,24 @@
/**
+ * The opaque.
+ */
+ protected char[] _opaque = null;
+
+
+ /**
* The authority.
*/
protected char[] _authority = null;
/**
+ * The userinfo.
+ */
+ protected char[] _userinfo = null;
+
+
+ /**
* The host.
*/
protected char[] _host = null;
@@ -294,9 +421,26 @@
protected char[] _fragment = null;
+ /**
+ * The debug.
+ */
+ protected static int debug = 0;
+
// ---------------------- Generous characters for each component validation
/**
+ * The percent "%" character always has the reserved purpose of being the
+ * escape indicator, it must be escaped as "%25" in order to be used as
+ * data within a URI.
+ */
+ protected static final BitSet percent = new BitSet(256);
+ // Static initializer for percent
+ static {
+ percent.set('%');
+ }
+
+
+ /**
* <p><blockquote><pre>
* digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
* "8" | "9"
@@ -368,7 +512,7 @@
protected static final BitSet escaped = new BitSet(256);
// Static initializer for escaped
static {
- escaped.set('%');
+ escaped.or(percent);
escaped.or(hex);
}
@@ -395,6 +539,8 @@
/**
+ * Data characters that are allowed in a URI but do not have a reserved
+ * purpose are called unreserved.
* <p><blockquote><pre>
* unreserved = alphanum | mark
* </pre></blockquote><p>
@@ -683,9 +829,9 @@
protected static final BitSet hostport = new BitSet(256);
// Static initializer for hostport
static {
- host.or(host);
- host.set(':');
- host.or(port);
+ hostport.or(host);
+ hostport.set(':');
+ hostport.or(port);
}
@@ -719,7 +865,7 @@
// Static initializer for server
static {
server.or(userinfo);
- userinfo.set('@');
+ server.set('@');
server.or(hostport);
}
@@ -883,73 +1029,204 @@
URI_reference.or(fragment);
}
+ // ---------------------------- Characters disallowed within the URI syntax
+ // Excluded US-ASCII Characters are like control, space, delims and unwise
+
+ /**
+ * control
+ */
+ public static final BitSet control = new BitSet(256);
+ // Static initializer for control
+ static {
+ for (int i = 0; i <= 0x1F; i++) {
+ control.set(i);
+ }
+ control.set(0x7F);
+ }
+
+ /**
+ * space
+ */
+ public static final BitSet space = new BitSet(256);
+ // Static initializer for space
+ static {
+ space.set(0x20);
+ }
+
+
+ /**
+ * delims
+ */
+ public static final BitSet delims = new BitSet(256);
+ // Static initializer for delims
+ static {
+ delims.set('<');
+ delims.set('>');
+ delims.set('#');
+ delims.set('%');
+ delims.set('"');
+ }
+
+
+ /**
+ * unwise
+ */
+ public static final BitSet unwise = new BitSet(256);
+ // Static initializer for unwise
+ static {
+ unwise.set('{');
+ unwise.set('}');
+ unwise.set('|');
+ unwise.set('\\');
+ unwise.set('^');
+ unwise.set('[');
+ unwise.set(']');
+ unwise.set('`');
+ }
+
+
+ /**
+ * disallowed rel_segment before escaping
+ */
+ public static final BitSet disallowed_rel_segment = new BitSet(256);
+ // Static initializer for disallowed_rel_segment
+ static {
+ disallowed_rel_segment.or(uric);
+ disallowed_rel_segment.andNot(rel_segment);
+ }
+
+
+ /**
+ * disallowed opaque_part before escaping
+ */
+ public static final BitSet disallowed_opaque_part = new BitSet(256);
+ // Static initializer for disallowed_opaque_part
+ static {
+ disallowed_opaque_part.or(uric);
+ disallowed_opaque_part.andNot(opaque_part);
+ }
+
// ------------------------------- Characters allowed within each component
/**
* Those characters that are allowed within the authority component.
*/
- public static final BitSet allowedAuthority = new BitSet(256);
- // Static initializer for allowedAuthority
+ public static final BitSet allowed_authority = new BitSet(256);
+ // Static initializer for allowed_authority
static {
- // FIXME: you can verify with validate method.
- allowedAuthority.or(unreserved);
- allowedAuthority.or(authority);
+ allowed_authority.or(authority);
+ allowed_authority.clear('%');
}
/**
- * Those characters that are allowed within the path component.
+ * Those characters that are allowed within the opaque_part.
*/
- public static final BitSet allowedPath = new BitSet(256);
- // Static initializer for allowedPath
+ public static final BitSet allowed_opaque_part = new BitSet(256);
+ // Static initializer for allowed_opaque_part
static {
- // FIXME: you can verify with validate method.
- allowedPath.or(unreserved);
- allowedPath.or(path);
+ allowed_opaque_part.or(opaque_part);
+ allowed_opaque_part.clear('%');
}
/**
- * Those characters that are allowed within the query component.
+ * Those characters that are allowed within the reg_name.
*/
- public static final BitSet allowedQuery = new BitSet(256);
- // Static initializer for allowedQuery
+ public static final BitSet allowed_reg_name = new BitSet(256);
+ // Static initializer for allowed_reg_name
static {
- // FIXME: you can verify with validate method.
- allowedQuery.or(unreserved);
+ allowed_reg_name.or(reg_name);
+ // allowed_reg_name.andNot(percent);
+ allowed_reg_name.clear('%');
}
/**
- * Those characters that are allowed within the fragment component.
+ * Those characters that are allowed within the userinfo component.
*/
- public static final BitSet allowedFragment = new BitSet(256);
- // Static initializer for allowedFragment
+ public static final BitSet allowed_userinfo = new BitSet(256);
+ // Static initializer for allowed_userinfo
static {
- // FIXME: you can verify with validate method.
- allowedFragment.or(unreserved);
+ allowed_userinfo.or(userinfo);
+ // allowed_userinfo.andNot(percent);
+ allowed_userinfo.clear('%');
}
/**
- * Those characters that are allowed within the userinfo component.
- * <p><blockquote><pre>
- * unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
- * </pre></blockquote><p>
+ * Those characters that are allowed within the IPv6reference component.
+ * '[', ']' in IPv6reference should be excluded.
+ */
+ public static final BitSet allowed_IPv6reference = new BitSet(256);
+ // Static initializer for allowed_IPv6reference
+ static {
+ allowed_IPv6reference.or(IPv6reference);
+ // allowed_IPv6reference.andNot(unwise);
+ allowed_IPv6reference.clear('[');
+ allowed_IPv6reference.clear(']');
+ }
+
+
+ /**
+ * Those characters that are allowed within the host component.
+ * '[', ']' in IPv6reference should be excluded.
+ */
+ public static final BitSet allowed_host = new BitSet(256);
+ // Static initializer for allowed_host
+ static {
+ allowed_host.or(hostname);
+ allowed_host.or(allowed_IPv6reference);
+ }
+
+
+ /**
+ * Those characters that are allowed within the abs_path.
*/
- public static final BitSet allowedUnwise = new BitSet(256);
- // Static initializer for allowedUnwise
+ public static final BitSet allowed_abs_path = new BitSet(256);
+ // Static initializer for allowed_abs_path
static {
- // FIXME: you can verify with validate method.
- allowedUnwise.or(unreserved);
- // allowedUnwise.or(unwise);
+ allowed_abs_path.or(abs_path);
+ // allowed_abs_path.set('/'); // aleady included
+ allowed_abs_path.clear('%');
+ }
+
+
+ /**
+ * Those characters that are allowed within the rel_segment.
+ */
+ public static final BitSet allowed_rel_segment = new BitSet(256);
+ // Static initializer for allowed_rel_segment
+ static {
+ allowed_rel_segment.or(rel_segment);
+ allowed_rel_segment.clear('%');
+ }
+
+
+ /**
+ * Those characters that are allowed within the query component.
+ */
+ public static final BitSet allowed_query = new BitSet(256);
+ // Static initializer for allowed_query
+ static {
+ allowed_query.or(uric);
+ allowed_query.clear('%');
+ }
+
+
+ /**
+ * Those characters that are allowed within the fragment component.
+ */
+ public static final BitSet allowed_fragment = new BitSet(256);
+ // Static initializer for allowed_fragment
+ static {
+ allowed_fragment.or(uric);
+ allowed_fragment.clear('%');
}
// ------------------------------------------- Flags for this URI-reference
// URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
- protected boolean _is_absoluteURI;
- protected boolean _is_relativeURI;
// absoluteURI = scheme ":" ( hier_part | opaque_part )
protected boolean _is_hier_part;
protected boolean _is_opaque_part;
@@ -959,21 +1236,19 @@
protected boolean _is_abs_path;
protected boolean _is_rel_path;
// net_path = "//" authority [ abs_path ]
- protected boolean _has_authority;
// authority = server | reg_name
protected boolean _is_reg_name;
protected boolean _is_server; // = _has_server
// server = [ [ userinfo "@" ] hostport ]
- protected boolean _has_userinfo;
- // hostport = host [ ":" port ]
- protected boolean _has_hostport;
// host = hostname | IPv4address | IPv6reference
protected boolean _is_hostname;
protected boolean _is_IPv4address;
protected boolean _is_IPv6reference;
+ // query
+ // fragment
+ protected boolean _is_only_fragment;
- // ------------------------------------------------------ Protected methods
-
+ // ------------------------------------------ Character and escape encoding
/**
* This is a two mapping, one from original characters to octets, and
@@ -989,17 +1264,18 @@
* @param original the original character sequence
* @param allowed those characters that are allowed within a component
* @return URI character sequence
- * @exception UnsupportedEncodingException
+ * @exception Exception
+ * if NullPointerException, null argument
+ * if UnsupportedEncodingException, unsupported character encoding
* @see escape
*/
- protected char[] encode(String original, BitSet allowed)
- throws UnsupportedEncodingException {
-
- if (original == null) return null;
+ protected char[] encode(String original, BitSet allowed) throws Exception {
+ // encode original to uri characters.
+ if (original == null) {
+ throw new NullPointerException("original");
+ }
byte[] octet = original.getBytes(_documentCharset);
- // TODO: decode octet to uri characters.
- // new String(octet, _protocolCharset);
- return null;
+ return escape(octet, allowed);
}
@@ -1016,13 +1292,15 @@
*
* @param octet the octet sequence
* @return original character sequence
- * @exception UnsupportedEncodingException
+ * @exception Exception
+ * if NullPointerException, null argument
+ * if UnsupportedEncodingException, unsupported character encoding
+ * if IllegalArgumentException, incomplete trailing escape pattern
* @see unescape
*/
- protected String decode(char[] uri)
- throws UnsupportedEncodingException {
- // TODO: decode octet to uri characters.
- return null;
+ protected String decode(char[] uri) throws Exception {
+ // decode uri to original characters.
+ return new String(unescape(uri), _documentCharset);
}
@@ -1034,16 +1312,42 @@
*
* An escaped octet is encoded as a character triplet, consisting of the
* percent character "%" followed by the two hexadecimal digits
- * representing the octet code. For example, "%20" is the escaped
+ * representing the octet code. For exsurrogateample, "%20" is the escaped
* encoding for the US-ASCII space character.
*
* @param octet the octet sequence to be escaped
* @param allowed those characters that are allowed within a component
* @return URI character sequence
- */
- protected char[] escape(byte[] octet, BitSet allowed) {
- // TODO: escape octet to uri characters.
- return null;
+ * @exception Exception
+ * if NullPointerException, null argument
+ * if UnsupportedEncodingException, unsupported character encoding
+ */
+ protected char[] escape(byte[] octet, BitSet allowed) throws Exception {
+ // escape octet to uri characters.
+ if (octet == null) {
+ throw new NullPointerException("octet");
+ }
+ if (allowed == null) {
+ throw new NullPointerException("allowed characters");
+ }
+ String octets = new String(octet, _protocolCharset);
+ char[] preuric = new char[octets.length()];
+ octets.getChars(0, octets.length(), preuric, 0);
+ StringBuffer buf = new StringBuffer(preuric.length);
+ for (int i = 0; i < preuric.length; i++) {
+ char c = (char) preuric[i];
+ if (allowed.get(c)) {
+ buf.append(c);
+ } else {
+ byte b = (byte) preuric[i];
+ buf.append('%');
+ char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
+ buf.append(hexadecimal);
+ hexadecimal = Character.forDigit(b & 0xF, 16);
+ buf.append(hexadecimal);
+ }
+ }
+ return buf.toString().toCharArray();
}
@@ -1053,30 +1357,103 @@
* URI character sequence->octet sequence
* </pre></blockquote><p>
*
- * the percent "%" character always has the reserved purpose of being
+ * The percent "%" character always has the reserved purpose of being
* the escape indicator, it must be escaped as "%25" in order to be used
* as data within a URI.
*
* @param uri the URI character sequence
* @return octet sequence
+ * @exception Exception
+ * if NullPointerException, null argument
+ * if UnsupportedEncodingException, unsupported character encoding
+ * if IllegalArgumentException, incomplete trailing escape pattern
+ */
+ protected byte[] unescape(char[] uri) throws Exception {
+ // unescape uri characters to octets
+ if (uri == null) {
+ throw new NullPointerException("uri");
+ }
+ byte[] octet = new String(uri).getBytes(_protocolCharset);
+ int oi = 0; // output index
+ for (int ii = 0; ii < uri.length; ) {
+ byte b = (byte) octet[ii++];
+ if (b == '%') {
+ b = (byte) Character.digit(
+ (char) (octet[ii++] << 4 + octet[ii++]), 16);
+ if (b == -1) {
+ throw new IllegalArgumentException(
+ "incomplete trailing escape pattern");
+ }
+ }
+ octet[oi++] = (byte) b;
+ }
+ octet[oi] = (byte) '\0';
+ return octet;
+ }
+
+
+ /**
+ * Pre-validate the unescaped URI string within a specific component.
+ *
+ * @param component the component string within the component
+ * @param disallowed those characters disallowed within the component
+ * @return if true, it doesn't have the disallowed characters
+ * if false, the component is undefined or an incorrect one
+ */
+ protected boolean prevalidate(String component, BitSet disallowed) {
+ // prevalidate the given component by disallowed characters
+ if (component == null) {
+ return false; // undefined
+ }
+ char[] target = component.toCharArray();
+ for (int i = 0; i < target.length; i++) {
+ if (disallowed.get(target[i]))
+ return false;
+ }
+ return true;
+ }
+
+
+ /**
+ * Validate the URI characters within a specific component.
+ * The component must be performed after escape encoding. Or it doesn't
+ * include escaped characters.
+ *
+ * @param component the characters sequence within the component
+ * @param generous those characters that are allowed within a component
+ * @return if true, it's the correct URI character sequence
*/
- protected byte[] unescape(char[] uri) {
- // TODO: unescape uri characters to octets
- return null;
+ protected boolean validate(char[] component, BitSet generous) {
+ // validate each component by generous characters
+ return validate(component, 0, -1, generous);
}
/**
- * Validate the URI characters within the specific component.
+ * Validate the URI characters within a specific component.
+ * The component must be performed after escape encoding. Or it doesn't
+ * include escaped characters.
+ *
* It's not that much strict, generous. The strict validation might be
* performed before being called this method.
*
* @param component the characters sequence within the component
+ * @param soffset the starting offset of the given component
+ * @param eoffset the ending offset of the given component
+ * if -1, it means the length of the component
* @param generous those characters that are allowed within a component
- * @return if true, it's the URI character sequence
+ * @return if true, it's the correct URI character sequence
*/
- protected boolean validate(char[] component, BitSet generous) {
- // TODO: validate each component with generous characters
+ protected boolean validate(char[] component, int soffset, int eoffset,
+ BitSet generous) {
+ // validate each component by generous characters
+ if (eoffset == -1) {
+ eoffset = component.length;
+ }
+ for (int i = soffset; i < eoffset; i++) {
+ if (!generous.get(component[i]))
+ return false;
+ }
return true;
}
@@ -1112,6 +1489,11 @@
* @exception Exception
*/
protected void parseUriReference(String original) throws Exception {
+ // validate and contruct the URI character sequence
+
+ if (original == null) {
+ throw new IllegalArgumentException("URI-Reference required");
+ }
/** @
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
@@ -1127,23 +1509,12 @@
}
/**
- * The index to start the search from.
- */
- int from = 0;
-
- /**
* <p><blockquote><pre>
* @@@@@@@@
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
* </pre></blockquote><p>
*/
- int at = tmp.indexOf(":/?#", from);
-
- /**
- * The next index to start the search to.
- * If it's not -1, it's the index to be stopped.
- */
- int next = -1;
+ int at = tmp.indexOf(":/?#");
/**
* <p><blockquote><pre>
@@ -1152,13 +1523,12 @@
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
* </pre></blockquote><p>
*/
- _is_absoluteURI = false;
if (at > 0 && tmp.charAt(at) == ':') {
- _scheme = tmp.substring(0, at).trim().toLowerCase().toCharArray();
- // Set flag
- _is_absoluteURI = true;
+ char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
+ if (validate(target, scheme)) {
+ _scheme = target;
+ }
}
- _is_relativeURI = !_is_absoluteURI;
/**
* The length of the sequence of characters.
@@ -1173,31 +1543,55 @@
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
* </pre></blockquote><p>
*/
- _is_net_path = false;
- _is_abs_path = false;
- _is_rel_path = false;
- _is_hier_part = false;
- if (tmp.charAt(at) == '/') {
+ // Reset flags
+ _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
+ if (at >= 0 && tmp.charAt(at) == '/') {
// Set flag
_is_hier_part = true;
- if (at+1 < length && tmp.charAt(at+1) == '/') {
- from = at + 2;
- next = tmp.indexOf("/?#", from); // at, if not -1
- parseAuthority((next > 0) ? tmp.substring(from, next) :
- tmp.substring(from));
+ if (at+2 < length && tmp.charAt(at+1) == '/') {
+ // the temporaray index to start the search from
+ int from = at + 2;
+ int next = tmp.indexOf("/?#", from); // at, if not -1
+ if (next == -1) {
+ next = tmp.length();
+ }
+ parseAuthority(tmp.substring(from, next));
+ at = next;
// Set flag
_is_net_path = true;
- } else {
+ }
+ if (tmp.charAt(at) == '/') {
// Set flag
_is_abs_path = true;
}
- } else if (_is_relativeURI) {
- // Set flag
- _is_rel_path = true;
} else {
- // REMINDME: never here or throw an Exception
+ if (_scheme == null) { // is_relativeURI
+ // rel_path = rel_segment [ abs_path ]
+ int next = tmp.indexOf('/');
+ if (next == -1) {
+ next = tmp.length();
+ }
+ // validating before escape encoding
+ if (prevalidate(tmp.substring(at, next),
+ disallowed_rel_segment)) {
+ // Set flag
+ _is_rel_path = true;
+ }
+ // REMINDME: let us skip the rest of abs_path to validate
+ } else { // is_absoluteURI
+ // validating before escape encoding
+ if (prevalidate(tmp.substring(at), disallowed_opaque_part)) {
+ // Set flag
+ _is_opaque_part = true;
+ }
+ }
+ if (!_is_rel_path || !_is_opaque_part) {
+ // correct validation. possibly, only fragment.
+ // is_relativeURI and is_absoluteURI must be false
+ // Set flag
+ _is_only_fragment = true;
+ }
}
- _is_opaque_part = !_is_hier_part;
/**
* <p><blockquote><pre>
@@ -1207,10 +1601,13 @@
* </pre></blockquote><p>
*/
if (tmp.charAt(at) != '?' && tmp.charAt(at) != '#') {
- from = at;
- next = tmp.indexOf("?#", from);
- _path = encode(tmp.substring(from, next), allowedPath);
- if (next > 0) at = next;
+ int from = at;
+ int next = tmp.indexOf("?#", from);
+ if (next == -1) {
+ next = tmp.length();
+ }
+ setPath(tmp.substring(from, next));
+ at = next;
}
/**
@@ -1220,11 +1617,13 @@
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
* </pre></blockquote><p>
*/
- if (tmp.charAt(at) == '?') {
- from = at + 1;
- next = tmp.indexOf('#', from);
- _query = encode(tmp.substring(from, next), allowedQuery);
- if (next > 0) at = next;
+ if (at+1 < length && tmp.charAt(at) == '?') {
+ int from = at + 1;
+ int next = tmp.indexOf('#', from);
+ if (next != -1) {
+ _query = encode(tmp.substring(from, next), allowed_query);
+ at = next;
+ }
}
/**
@@ -1234,30 +1633,135 @@
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
* </pre></blockquote><p>
*/
- if (tmp.charAt(at) == '#') {
- from = at + 1;
- _fragment = encode(tmp.substring(from), allowedFragment);
+ if (at+1 < length && tmp.charAt(at) == '#') {
+ int from = at + 1;
+ _fragment = encode(tmp.substring(from), allowed_fragment);
}
+
+ // set this URI.
+ setUriReference();
}
/**
- * Parse the authority part.
+ * Parse the authority component.
*
- * @param original the original character sequence
- * @return the original character sequence
+ * @param original the original character sequence of authority component
* @exception Exception
+ * if NumberFormatException, port isn't integer
+ * if IllegalArgumentException, incorrect Pv6reference or wrong host
*/
protected void parseAuthority(String original) throws Exception {
- // TODO: validate the authroity part and
- // confirm the component within the authority part.
- //_authority = encode(original, allowedAuthority);
- // TODO: Set reg_name, server, userinfo, hostport
- // TODO: _is_reg_name, _is_server, _has_userinfo, _is_hostport
- // _is_hostname, _is_IPv4address, _is_IPv6reference
+ // Reset flags
+ _is_reg_name = _is_server =
+ _is_hostname = _is_IPv4address = _is_IPv6reference = false;
+
+ int from = 0;
+ int next = original.indexOf('@');
+ if (next != -1) { // neither -1 and 0
+ // if next == 0, for example, in ftp, userinfo = 'anonymous'
+ // each protocol extented from URI supports the specific userinfo
+ _userinfo = encode(original.substring(0, next), allowed_userinfo);
+ from = next + 1;
+ }
+ next = original.indexOf('[', from);
+ if (next >= from) {
+ next = original.indexOf(']', from);
+ if (next == -1) {
+ throw new IllegalArgumentException("IPv6reference");
+ } else {
+ next++;
+ }
+ // In IPv6reference, '[', ']' should be excluded
+ _host = encode(original.substring(from, next),
+ allowed_IPv6reference);
+ // Set flag
+ _is_IPv6reference = true;
+ } else { // only for !_is_IPv6reference
+ next = original.indexOf(':', from);
+ if (next == -1) {
+ next = original.length();
+ }
+ if (validate(_host, IPv4address)) {
+ _host = original.substring(from, next).toCharArray();
+ // Set flag
+ _is_IPv4address = true;
+ } else if (validate(_host, hostname)) {
+ _host = original.substring(from, next).toCharArray();
+ // Set flag
+ _is_hostname = true;
+ } else {
+ // Set flag
+ _is_reg_name = true;
+ }
+ }
+ if (_is_reg_name) {
+ // Reset flags for a server-based naming authority
+ _is_server = _is_hostname = _is_IPv4address =
+ _is_IPv6reference = false;
+ // set a registry-based naming authority
+ _authority = encode(original.toString(), allowed_reg_name);
+ } else {
+ if (original.charAt(next) == ':') {
+ from = next + 1;
+ _port = Integer.parseInt(original.substring(from));
+ }
+ // set a server-based naming authority
+ StringBuffer buf = new StringBuffer();
+ if (_userinfo != null) { // has_userinfo
+ buf.append(_userinfo);
+ buf.append('@');
+ }
+ if (_host != null) {
+ buf.append(_host);
+ if (_port != -1) {
+ buf.append(':');
+ buf.append(_port);
+ }
+ }
+ _authority = buf.toString().toCharArray();
+ // Set flag
+ _is_server = true;
+ }
}
+ /**
+ * Once it's parsed successfully, set this URI.
+ *
+ * @see getRawURI
+ */
+ protected void setUriReference() throws Exception {
+ // set _uri
+ StringBuffer buf = new StringBuffer();
+ // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ if (_scheme != null) {
+ buf.append(_scheme);
+ buf.append(':');
+ }
+ if (_is_net_path) {
+ buf.append("//");
+ if (_authority != null) { // has_authority
+ buf.append(_authority);
+ }
+ }
+ if (_opaque != null && _is_opaque_part) {
+ buf.append(_opaque);
+ } else if (_path != null) { // _is_hier_part or _is_relativeURI
+ buf.append(_path);
+ if (_query != null) { // has_query
+ buf.append('?');
+ buf.append(_query);
+ }
+ }
+ if (_fragment != null) { // has_fragment
+ buf.append('#');
+ buf.append(_fragment);
+ }
+
+ _uri = buf.toString().toCharArray();
+ }
+
// --------------------------------------------------------- Public methods
@@ -1267,7 +1771,7 @@
* @return true iif this URI is absoluteURI
*/
public boolean is_absoluteURI() {
- return _is_absoluteURI;
+ return _is_only_fragment ? false : (_scheme != null);
}
@@ -1277,7 +1781,7 @@
* @return true iif this URI is relativeURI
*/
public boolean is_relativeURI() {
- return _is_relativeURI;
+ return _is_only_fragment ? false : (_scheme == null);
}
@@ -1303,11 +1807,13 @@
/**
* Tell whether or not this URI is net_path.
+ * It's the same function as the has_authority() method.
*
* @return true iif this URI is net_path
+ * @see has_authority
*/
public boolean is_net_path() {
- return _is_net_path;
+ return _is_net_path || (_authority != null);
}
@@ -1333,11 +1839,13 @@
/**
* Tell whether or not this URI has authority.
+ * It's the same function as the is_net_path() method.
*
* @return true iif this URI has authority
+ * @see is_net_path
*/
public boolean has_authority() {
- return _has_authority;
+ return (_authority != null) || _is_net_path;
}
/**
@@ -1366,21 +1874,11 @@
* @return true iif this URI has userinfo
*/
public boolean has_userinfo() {
- return _has_userinfo;
+ return (_userinfo != null);
}
/**
- * Tell whether or not this URI has hostport.
- *
- * @return true iif this URI has hostport
- */
- public boolean has_hostport() {
- return _has_hostport;
- }
-
-
- /**
* Tell whether or not this URI is hostname.
*
* @return true iif this URI is hostname
@@ -1408,6 +1906,27 @@
public boolean is_IPv6reference() {
return _is_IPv6reference;
}
+
+
+ /**
+ * Tell whether or not this URI has query.
+ *
+ * @return true iif this URI has query
+ */
+ public boolean has_query() {
+ return (_query != null);
+ }
+
+
+ /**
+ * Tell whether or not this URI has fragment.
+ *
+ * @return true iif this URI has fragment
+ */
+ public boolean has_fragment() {
+ return (_fragment != null);
+ }
+
// ---------------------------------------------------------------- Charset
@@ -1431,7 +1950,7 @@
* An individual URI scheme may require a single charset, define a default
* charset, or provide a way to indicate the charset used.
*
- * @return charset
+ * @return the charset string
*/
public String getProtocolCharset() {
return _protocolCharset;
@@ -1451,7 +1970,7 @@
/**
* Get the default charset of the document.
*
- * @return charset
+ * @return the charset string
*/
public String getDocumentCharset() {
return _documentCharset;
@@ -1463,24 +1982,57 @@
* Get the scheme.
*
* @return the scheme
- * @exception Exception
- * UnsupportedEncodingException
*/
- public String getScheme() throws Exception {
- return decode(_scheme);
+ public char[] getRawScheme() {
+ return _scheme;
+ }
+
+
+ /**
+ * Get the scheme.
+ *
+ * @return the scheme
+ */
+ public String getScheme() {
+ return new String(_scheme);
}
// ---------------------------------------------------------- The authority
/**
- * Set the authority.
+ * Set the authority. It can be one type of server, hostport, hostname,
+ * IPv4address, IPv6reference and reg_name.
+ * <p><blockquote><pre>
+ * authority = server | reg_name
+ * </pre></blockquote><p>
*
* @param the authority
* @exception Exception
* UnsupportedEncodingException
*/
public void setAuthority(String authority) throws Exception {
- _authority = encode(authority, allowedAuthority);
+ parseAuthority(authority);
+ setUriReference();
+ }
+
+
+ /**
+ * Get the raw-escaped authority.
+ *
+ * @return the raw-escaped authority
+ */
+ public char[] getRawAuthority() {
+ return _authority;
+ }
+
+
+ /**
+ * Get the escaped authority.
+ *
+ * @return the escaped authority
+ */
+ public String getEscapedAuthority() {
+ return new String(_authority);
}
@@ -1489,55 +2041,89 @@
*
* @return the authority
* @exception Exception
- * UnsupportedEncodingException
+ * @see decode
*/
public String getAuthority() throws Exception {
return decode(_authority);
}
- // --------------------------------------------------------------- The host
+ // ----------------------------------------------------------- The userinfo
+
+ /**
+ * Get the raw-escaped userinfo.
+ *
+ * @return the raw-escaped userinfo
+ * @see getAuthority
+ */
+ public char[] getRawUserinfo() {
+ return _userinfo;
+ }
+
+
+ /**
+ * Get the escaped userinfo.
+ *
+ * @return the escaped userinfo
+ * @see getAuthority
+ */
+ public String getEscapedUserinfo() {
+ return new String(_userinfo);
+ }
+
/**
- * Set the host.
+ * Get the userinfo.
*
- * @param the host
+ * @return the userinfo
* @exception Exception
- * UnsupportedEncodingException
+ * @see decode
+ * @see getAuthority
*/
- public void setHost(String host) throws Exception {
- // Support the non-ASCII host configuration
- _host = encode(host, this.host); // Notice that there isn't allowedHost
+ public String getUserinfo() throws Exception {
+ return decode(_userinfo);
}
+ // --------------------------------------------------------------- The host
/**
* Get the host.
+ * <p><blockquote><pre>
+ * host = hostname | IPv4address | IPv6reference
+ * </pre></blockquote><p>
*
* @return the host
- * @exception Exception
- * UnsupportedEncodingException
+ * @see getAuthority
*/
- public String getHost() throws Exception {
- // Support the non-ASCII host configuration
- return decode(_host);
+ public char[] getRawHost() {
+ return _host;
}
- // --------------------------------------------------------------- The port
/**
- * Set the port.
+ * Get the host.
+ * <p><blockquote><pre>
+ * host = hostname | IPv4address | IPv6reference
+ * </pre></blockquote><p>
*
- * @param the port
+ * @return the host
+ * @exception Exception
+ * @see decode
+ * @see getAuthority
*/
- public void setPort(int port) {
- _port = port;
+ public String getHost() throws Exception {
+ return decode(_host);
}
+ // --------------------------------------------------------------- The port
/**
- * Get the port.
+ * Get the port. In order to get the specfic default port, the specific
+ * protocol-supported class extended from the URI class should be used.
+ * It has the server-based naming authority.
*
* @return the port
+ * if -1, it has the default port for the scheme or the server-based
+ * naming authority is not supported in the specific URI.
*/
public int getPort() {
return _port;
@@ -1546,39 +2132,249 @@
// --------------------------------------------------------------- The path
/**
- * Set the path.
+ * Set the path. The method couldn't be used by API programmers.
*
- * @param the path string
+ * @param path the path string
* @exception Exception
- * UnsupportedEncodingException
+ * if IllegalArgumentException, set incorrectly or fragment only
+ * @see encode
*/
- public void setPath(String path) throws Exception {
- _path = encode(path, allowedPath);
+ protected void setPath(String path) throws Exception {
+ // set path
+ if (_is_net_path || _is_abs_path) {
+ _path = encode(path, allowed_abs_path);
+ } else if (_is_rel_path) {
+ StringBuffer buff = new StringBuffer(path.length());
+ int at = path.indexOf('/');
+ if (at > 0) { // never 0
+ buff.append(encode(path.substring(0, at), allowed_rel_segment));
+ buff.append(encode(path.substring(at), allowed_abs_path));
+ } else {
+ buff.append(encode(path, allowed_rel_segment));
+ }
+ _path = buff.toString().toCharArray();
+ } else if (_is_opaque_part) {
+ _opaque = encode(path, allowed_opaque_part);
+ } else {
+ throw new IllegalArgumentException("incorrect path");
+ }
}
/**
- * Get the path.
+ * Resolve the base and relative path.
*
- * @return the path string
+ * @param base_path a character array of the base_path
+ * @param rel_path a character array of the rel_path
+ * @return the resolved path
+ */
+ protected char[] resolvePath(char[] base_path, char[] rel_path) {
+ // REMINDME: paths are never null
+ String base = new String(base_path);
+ int at = base.lastIndexOf('/');
+ if (at != -1) {
+ base_path = base.substring(0, at + 1).toCharArray();
+ }
+ // _path could be empty
+ if (rel_path.length == 0) {
+ return normalize(base_path);
+ } else if (rel_path[0] == '/') {
+ return rel_path;
+ } else {
+ StringBuffer buff = new StringBuffer(base.length() +
+ rel_path.length);
+ if (at != -1) {
+ buff.append(base.substring(0, at + 1));
+ buff.append(rel_path);
+ }
+ return normalize(buff.toString().toCharArray());
+ }
+ }
+
+
+ /**
+ * Get the raw-escaped current hierarchy level in the given path.
+ *
+ * @param path the path
+ * @return the current hierarchy level
* @exception Exception
- * UnsupportedEncodingException
+ * if IllegalArgumentException, no hierarchy level
+ * if NullPointerException, null argument
*/
- public String getPath() throws Exception {
- return decode(_path);
+ protected char[] getRawCurrentHierPath(char[] path) throws Exception {
+ if (_is_opaque_part) {
+ throw new IllegalArgumentException("no hierarchy level");
+ }
+ if (path == null) {
+ throw new NullPointerException("null argument");
+ }
+ String buff = new String(path);
+ int first = buff.indexOf('/');
+ int last = buff.lastIndexOf('/');
+ if (first != last && last != -1) {
+ return buff.substring(0, last).toCharArray();
+ }
+ // FIXME: it could be a document on the server side
+ return path;
+ }
+
+
+ /**
+ * Get the raw-escaped current hierarchy level.
+ *
+ * @return the raw-escaped current hierarchy level
+ * @exception Exception
+ */
+ public char[] getRawCurrentHierPath() throws Exception {
+ return getRawCurrentHierPath(_path);
+ }
+
+
+ /**
+ * Get the escaped current hierarchy level.
+ *
+ * @return the escaped current hierarchy level
+ * @exception Exception
+ */
+ public String getEscapedCurrentHierPath() throws Exception {
+ return new String(getRawCurrentHierPath());
+ }
+
+
+ /**
+ * Get the current hierarchy level.
+ *
+ * @return the current hierarchy level
+ * @exception Exception
+ * @see decode
+ */
+ public String getCurrentHierPath() throws Exception {
+ return decode(getRawCurrentHierPath());
+ }
+
+
+ /**
+ * Get the level above the this hierarchy level.
+ *
+ * @return the raw above hierarchy level
+ * @exception Exception
+ */
+ public char[] getRawAboveHierPath() throws Exception {
+ return getRawCurrentHierPath(getRawCurrentHierPath());
+ }
+
+
+ /**
+ * Get the level above the this hierarchy level.
+ *
+ * @return the raw above hierarchy level
+ * @exception Exception
+ */
+ public String getEscapedAboveHierPath() throws Exception {
+ return new String(getRawAboveHierPath());
+ }
+
+
+ /**
+ * Get the level above the this hierarchy level.
+ *
+ * @return the above hierarchy level
+ * @exception Exception
+ * @see decode
+ */
+ public String getAboveHierPath() throws Exception {
+ return decode(getRawAboveHierPath());
+ }
+
+
+ /**
+ * Get the raw-escaped path.
+ * <p><blockquote><pre>
+ * path = [ abs_path | opaque_part ]
+ * </pre></blockquote><p>
+ * @return the raw-escaped path
+ */
+ public char[] getRawPath() {
+ return _is_opaque_part ? _opaque : _path;
}
/**
* Get the escaped path.
+ * <p><blockquote><pre>
+ * path = [ abs_path | opaque_part ]
+ * abs_path = "/" path_segments
+ * opaque_part = uric_no_slash *uric
+ * </pre></blockquote><p>
*
* @return the escaped path string
*/
public String getEscapedPath() {
- return new String(_path);
+ return new String(_is_opaque_part ? _opaque :_path);
}
+ /**
+ * Get the path.
+ * <p><blockquote><pre>
+ * path = [ abs_path | opaque_part ]
+ * </pre></blockquote><p>
+ * @return the path string
+ * @exception Exception
+ * @decode
+ */
+ public String getPath() throws Exception {
+ return decode(getRawPath());
+ }
+
+ // ----------------------------------------------------- The path and query
+
+ /**
+ * Get the raw-escaped path and query.
+ *
+ * @return the raw-escaped path and query
+ * @exception NullPointerException path undefined
+ */
+ public char[] getRawPathQuery() {
+ if (_path == null) {
+ throw new NullPointerException("path undefined");
+ }
+ int len = _path.length;
+ if (_query != null) {
+ len += 1 + _query.length;
+ }
+ StringBuffer buff = new StringBuffer(len);
+ buff.append(_path);
+ if (_query != null) {
+ buff.append('?');
+ buff.append(_query);
+ }
+ return buff.toString().toCharArray();
+ }
+
+
+ /**
+ * Get the escaped query.
+ *
+ * @return the escaped path and query string
+ * @exception NullPointerException path undefined
+ */
+ public String getEscapedPathQuery() {
+ return new String(getRawPathQuery());
+ }
+
+
+ /**
+ * Get the path and query.
+ *
+ * @return the path and query string.
+ * @exception Exception
+ * @decode
+ */
+ public String getPathQuery() throws Exception {
+ return decode(getRawPathQuery());
+ }
+
// -------------------------------------------------------------- The query
/**
@@ -1586,100 +2382,341 @@
*
* @param the query string.
* @exception Exception
- * UnsupportedEncodingException
+ * @encode
*/
public void setQuery(String query) throws Exception {
- _query = encode(query, allowedQuery);
+ _query = encode(query, allowed_query);
+ setUriReference();
}
/**
- * Get the query.
+ * Get the raw-escaped query.
*
- * @return the query string.
- * @exception Exception
- * UnsupportedEncodingException
+ * @return the raw-escaped query
*/
- public String getQuery() throws Exception {
- return decode(_query);
+ public char[] getRawQuery() {
+ return _query;
}
/**
* Get the escaped query.
*
- * @return the escaped query string.
+ * @return the escaped query string
*/
public String getEscapedQuery() {
return new String(_query);
}
+
+ /**
+ * Get the query.
+ *
+ * @return the query string.
+ * @exception Exception
+ * @decode
+ */
+ public String getQuery() throws Exception {
+ return decode(_query);
+ }
+
// ----------------------------------------------------------- The fragment
/**
* Set the fragment.
*
+ * An empty URI reference represents the base URI of the current document
+ * and should be replaced by that URI when transformed into a request.
+ *
* @param the fragment string.
* @exception Exception
* UnsupportedEncodingException
*/
public void setFragment(String fragment) throws Exception {
- _fragment = encode(fragment, allowedFragment);
+ _fragment = encode(fragment, allowed_fragment);
+ setUriReference();
}
/**
- * Get the fragment.
+ * Get the raw-escaped fragment.
*
- * @return the fragment string.
- * @exception Exception
- * UnsupportedEncodingException
+ * The optional fragment identifier is not part of a URI, but is often used
+ * in conjunction with a URI.
+ *
+ * The format and interpretation of fragment identifiers is dependent on
+ * the media type [RFC2046] of the retrieval result.
+ *
+ * A fragment identifier is only meaningful when a URI reference is
+ * intended for retrieval and the result of that retrieval is a document
+ * for which the identified fragment is consistently defined.
+ *
+ * @return the raw-escaped fragment
*/
- public String getFragment() throws Exception {
- return decode(_fragment);
+ public char[] getRawFragment() {
+ return _fragment;
}
+
/**
* Get the escaped fragment.
*
- * @return the escaped fragment string.
+ * @return the escaped fragment string
*/
public String getEscapedFragment() {
return new String(_fragment);
}
+
+ /**
+ * Get the fragment.
+ *
+ * @return the fragment string
+ * @exception Exception
+ * @decode
+ */
+ public String getFragment() throws Exception {
+ return decode(_fragment);
+ }
+
// ------------------------------------------------------------- Utilities
/**
- * Normalize this URI.
+ * Normalize the given hier path part.
+ *
+ * @param path the path to normalize
+ * @return the normalized path
+ */
+ protected char[] normalize(char[] path) {
+ if (path == null) {
+ return null;
+ }
+ String normalized = new String(path);
+ // precondition
+ if (!normalized.endsWith("/")) {
+ normalized += '/';
+ }
+ // Resolve occurrences of "/./" in the normalized path
+ while (true) {
+ int at = normalized.indexOf("/./");
+ if (at == -1) {
+ break;
+ }
+ normalized = normalized.substring(0, at) +
+ normalized.substring(at + 2);
+ }
+ // Resolve occurrences of "/../" in the normalized path
+ while (true) {
+ int at = normalized.indexOf("/../");
+ if (at == -1) {
+ break;
+ }
+ if (at == 0) {
+ normalized = "/";
+ break;
+ }
+ int backward = normalized.lastIndexOf('/', at - 1);
+ if (backward == -1) {
+ // consider the rel_path
+ normalized = normalized.substring(at + 4);
+ } else {
+ normalized = normalized.substring(0, backward) +
+ normalized.substring(at + 3);
+ }
+ }
+ // Resolve occurrences of "//" in the normalized path
+ while (true) {
+ int at = normalized.indexOf("//");
+ if (at == -1) {
+ break;
+ }
+ normalized = normalized.substring(0, at) +
+ normalized.substring(at + 1);
+ }
+ // Set the normalized path that we have completed
+ return normalized.toCharArray();
+ }
+
+
+ /**
+ * Normalize the path part of this URI.
+ */
+ public void normalize() {
+ _path = normalize(_path);
+ }
+
+
+ /**
+ * Set debug mode
*
- * @return the normalized URI
+ * @param level the level of debug mode
*/
- public URI normalize() {
- // TODO: normalize and return URI
- return null;
+ public void setDebug(int level) {
+ debug = level;
}
+
/**
- * Test a object if this is equal with another.
+ * Test if the first array is equal to the second array.
+ *
+ * @param first the first character array
+ * @param second the second character array
+ * @return true if they're equal
+ */
+ protected boolean equals(char[] first, char[] second) {
+ if (first == null && second == null) {
+ return true;
+ }
+ if (first == null || second == null) {
+ return false;
+ }
+ if (first.length != second.length) {
+ return false;
+ }
+ for (int i = 0; i < first.length; i++) {
+ if (first[i] != second[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+
+ /**
+ * Test an object if this URI is equal to another.
*
* @param obj an object to compare
* @return true if two URI objects are equal
*/
public boolean equals(Object obj) {
- // TODO: normalize and test each components
- return false;
+ // normalize and test each components
+ if (obj == this) {
+ return true;
+ }
+ if (!(obj instanceof URI)) {
+ return false;
+ }
+ URI another = (URI) obj;
+ // scheme
+ if (!equals(_scheme, another._scheme)) {
+ return false;
+ }
+ // is_opaque_part or is_hier_part? and opaque
+ if (!equals(_opaque, another._opaque)) {
+ return false;
+ }
+ // is_hier_part
+ // has_authority
+ if (!equals(_authority, another._authority)) {
+ return false;
+ }
+ // path
+ if (!equals(_path, another._path)) {
+ return false;
+ }
+ // has_query
+ if (!equals(_query, another._query)) {
+ return false;
+ }
+ // has_fragment? should be careful of the only fragment case.
+ if (!equals(_fragment, another._fragment)) {
+ return false;
+ }
+ return true;
+ }
+
+ // ---------------------------------------------------------- Serialization
+
+ /**
+ * Write the content of this URI.
+ *
+ * @param oos the object-output stream
+ */
+ protected void writeObject(java.io.ObjectOutputStream oos)
+ throws IOException {
+ oos.defaultWriteObject();
}
+
+ /**
+ * Read a URI.
+ *
+ * @param ois the object-input stream
+ */
+ protected void readObject(java.io.ObjectInputStream ois)
+ throws ClassNotFoundException, IOException {
+ ois.defaultReadObject();
+ }
+
+ // ------------------------------------------------------------- Comparison
+
+ /**
+ * Compare this URI to another object.
+ *
+ * @param obj the object to be compared.
+ * @return 0, if it's same
+ * @exception ClassCastException not URI argument
+ * @exception NullPointerException character encoding error or null object
+ */
+ public int compareTo(Object obj) {
+ URI another = (URI) obj;
+ return toString().compareTo(another.toString());
+ }
+
+ // ------------------------------------------------------------ Get the URI
+
+ /**
+ * It can be gotten the URI character sequence. It's raw-escaped.
+ * For the purpose of the protocol to be transported, it will be useful.
+ *
+ * @return URI character sequence
+ */
+ public char[] getRawURI() {
+ return _uri;
+ }
+
+
+ /**
+ * It can be gotten the URI character sequence. It's escaped.
+ * For the purpose of the protocol to be transported, it will be useful.
+ *
+ * @return the URI string
+ */
+ public String getEscapedURI() {
+ return new String(_uri);
+ }
+
/**
- * Get the escaped URI string.
- * For the purpose of the protocol to be transported, it's useful.
+ * It can be gotten the URI character sequence.
+ *
+ * @return the URI string
+ * @exception Exception
+ * @decode
+ */
+ public String getURI() throws Exception {
+ return decode(_uri);
+ }
+
+
+ /**
+ * Get the escaped URI string. It doesn't throw any exception.
+ * However, if there is an error, null is returned.
*
* @return the escaped URI string
+ * if null, error
*/
public String toString() {
- return new String(_uri);
+ String s = null;
+ try {
+ s = getURI();
+ } catch (Throwable t) {
+ if (debug > 0) {
+ t.getMessage();
+ }
+ }
+ return s;
}
}
--
To unsubscribe, e-mail: <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>