You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@avalon.apache.org by br...@apache.org on 2003/06/07 22:59:40 UTC
cvs commit: avalon-excalibur/sourceresolve/src/test/org/apache/excalibur/source/test SourceUtilTestCase.java
bruno 2003/06/07 13:59:40
Modified: sourceresolve/src/java/org/apache/excalibur/source
SourceUtil.java
sourceresolve/src/test/org/apache/excalibur/source/test
SourceUtilTestCase.java
Log:
Added a few methods to SourceUtil to "absolutize" URLs (following RFC 2396).
Revision Changes Path
1.9 +243 -25 avalon-excalibur/sourceresolve/src/java/org/apache/excalibur/source/SourceUtil.java
Index: SourceUtil.java
===================================================================
RCS file: /home/cvs/avalon-excalibur/sourceresolve/src/java/org/apache/excalibur/source/SourceUtil.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- SourceUtil.java 7 Jun 2003 18:28:36 -0000 1.8
+++ SourceUtil.java 7 Jun 2003 20:59:40 -0000 1.9
@@ -59,6 +59,11 @@
import java.util.Iterator;
import org.apache.avalon.framework.parameters.Parameters;
+import org.apache.avalon.framework.CascadingRuntimeException;
+import org.apache.regexp.REProgram;
+import org.apache.regexp.RECompiler;
+import org.apache.regexp.RESyntaxException;
+import org.apache.regexp.RE;
/**
*
@@ -322,7 +327,7 @@
}
return null;
}
-
+
/**
* Move the source to a specified destination.
*
@@ -333,25 +338,25 @@
* the move.
*/
static public void move(Source source,
- Source destination)
- throws SourceException
+ Source destination)
+ throws SourceException
{
if (source instanceof MoveableSource
&& source.getClass().equals(destination.getClass()))
{
((MoveableSource)source).moveTo(destination);
- }
- else if (source instanceof ModifiableSource)
+ }
+ else if (source instanceof ModifiableSource)
{
copy(source, destination);
((ModifiableSource) source).delete();
- }
- else
+ }
+ else
{
throw new SourceException("Source '"+source.getURI()+ "' is not writeable");
}
}
-
+
/**
* Get the position of the scheme-delimiting colon in an absolute URI, as specified
* by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>, appendix A. This method is
@@ -361,7 +366,7 @@
* Use this method when you need both the scheme and the scheme-specific part of an URI,
* as calling successively {@link #getScheme(String)} and {@link #getSpecificPart(String)}
* will call this method twice, and as such won't be efficient.
- *
+ *
* @param uri the URI
* @return int the scheme-delimiting colon, or <code>-1</code> if not found.
*/
@@ -389,7 +394,7 @@
{
return -1;
}
-
+
// Check that first character is alpha
// (lowercase first since it's the most common case)
char ch = uri.charAt(0);
@@ -399,7 +404,7 @@
// Invalid first character
return -1;
}
-
+
int pos = uri.indexOf(':');
if (pos != -1)
{
@@ -417,13 +422,13 @@
}
}
}
-
+
return pos;
}
-
+
/**
* Get the scheme of an absolute URI.
- *
+ *
* @param uri the absolute URI
* @return the URI scheme
*/
@@ -432,12 +437,12 @@
int pos = indexOfSchemeColon(uri);
return (pos == -1) ? null : uri.substring(0, pos);
}
-
+
/**
* Get the scheme-specific part of an absolute URI. Note that this includes everything
* after the separating colon, including the fragment, if any (RFC 2396 separates it
* from the scheme-specific part).
- *
+ *
* @param uri the absolute URI
* @return the scheme-specific part of the URI
*/
@@ -457,14 +462,14 @@
* the copy.
*/
static public void copy(Source source,
- Source destination)
+ Source destination)
throws SourceException {
- if (source instanceof MoveableSource
- && source.getClass().equals(destination.getClass()))
+ if (source instanceof MoveableSource
+ && source.getClass().equals(destination.getClass()))
{
((MoveableSource) source).copyTo(destination);
- }
- else
+ }
+ else
{
if ( !(destination instanceof ModifiableSource)) {
throw new SourceException("Source '"+
@@ -475,7 +480,7 @@
try {
OutputStream out = ((ModifiableSource) destination).getOutputStream();
InputStream in = source.getInputStream();
-
+
copy(in, out);
} catch (IOException ioe) {
throw new SourceException("Could not copy source '"+
@@ -485,10 +490,10 @@
}
}
}
-
+
/**
* Copy the contents of an <code>InputStream</code> to an <code>OutputStream</code>.
- *
+ *
* @param in
* @param out
* @throws IOException
@@ -504,6 +509,219 @@
in.close();
out.flush();
out.close();
+ }
+
+ private static final String urlregexp = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
+ private static final REProgram urlREProgram;
+ static
+ {
+ RECompiler compiler = new RECompiler();
+ try
+ {
+ urlREProgram = compiler.compile(urlregexp);
+ }
+ catch (RESyntaxException e)
+ {
+ throw new CascadingRuntimeException("SourceUtil: could not compile urlregexp pattern", e);
+ }
+ }
+
+ public static final int SCHEME = 2;
+ private static final int AUTHORITY_WITH_PRECEDING_SLASHES = 3;
+ public static final int AUTHORITY = 4;
+ public static final int PATH = 5;
+ public static final int QUERY = 7;
+ public static final int FRAGMENT = 9;
+
+ /**
+ * Calls absolutize(url1, url2, false).
+ */
+ public static String absolutize(String url1, String url2)
+ {
+ return absolutize(url1, url2, false);
+ }
+
+ /**
+ * Applies a location to a baseURI. This is done as described in RFC 2396 section 5.2.
+ *
+ * @param url1 the baseURI
+ * @param url2 the location
+ * @param treatAuthorityAsBelongingToPath considers the authority to belong to the path. These
+ * special kind of URIs are used in the Apache Cocoon project.
+ */
+ public static String absolutize(String url1, String url2, boolean treatAuthorityAsBelongingToPath)
+ {
+ if (url1 == null)
+ return url2;
+
+ // parse the urls into parts
+ // if the second url contains a scheme, it is not relative so return it right away (part 3 of the algorithm)
+ String[] url2Parts = parseUrl(url2);
+ if (url2Parts[SCHEME] != null)
+ return url2;
+ String[] url1Parts = parseUrl(url1);
+
+ if (treatAuthorityAsBelongingToPath)
+ return absolutizeWithoutAuthority(url1Parts, url2Parts);
+
+ // check if it is a reference to the current document (part 2 of the algorithm)
+ if (url2Parts[PATH].equals("") && url2Parts[QUERY] == null && url2Parts[AUTHORITY] == null)
+ return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], url1Parts[PATH], url1Parts[QUERY], url2Parts[FRAGMENT]);
+
+ // it is a network reference (part 4 of the algorithm)
+ if (url2Parts[AUTHORITY] != null)
+ return makeUrl(url1Parts[SCHEME], url2Parts[AUTHORITY], url2Parts[PATH], url2Parts[QUERY], url2Parts[QUERY]);
+
+ String url1Path = url1Parts[PATH];
+ String url2Path = url2Parts[PATH];
+
+ // if the path starts with a slash (part 5 of the algorithm)
+ if (url2Path != null && url2Path.length() > 0 && url2Path.charAt(0) == '/')
+ return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], url2Parts[PATH], url2Parts[QUERY], url2Parts[QUERY]);
+
+ // combine the 2 paths
+ String path = stripLastSegment(url1Path);
+ path = path + (path.endsWith("/") ? "" : "/") + url2Path;
+ path = normalize(path);
+
+ return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], path, url2Parts[QUERY], url2Parts[FRAGMENT]);
+ }
+
+ /**
+ * Absolutizes URIs whereby the authority part is considered to be a part of the path.
+ * This special kind of URIs is used in the Apache Cocoon project for the cocoon and context protocols.
+ * This method is internally used by {@link #absolutize}.
+ */
+ private static String absolutizeWithoutAuthority(String[] url1Parts, String[] url2Parts)
+ {
+ String authority1 = url1Parts[AUTHORITY_WITH_PRECEDING_SLASHES];
+ String authority2 = url2Parts[AUTHORITY_WITH_PRECEDING_SLASHES];
+
+ String path1 = url1Parts[PATH];
+ String path2 = url2Parts[PATH];
+
+ if (authority1 != null)
+ path1 = authority1 + path1;
+ if (authority2 != null)
+ path2 = authority2 + path2;
+
+ String path = stripLastSegment(path1);
+ path = path + (path.endsWith("/") ? "" : "/") + path2;
+ path = normalize(path);
+
+ String scheme = url1Parts[SCHEME];
+ return scheme + ":" + path;
+ }
+
+ private static String stripLastSegment(String path)
+ {
+ int i = path.lastIndexOf('/');
+ if(i > -1)
+ return path.substring(0, i + 1);
+ return path;
+ }
+
+ /**
+ * Removes things like <segment>/../ or ./, as described in RFC 2396 in
+ * step 6 of section 5.2.
+ */
+ private static String normalize(String path)
+ {
+ // replace all /./ with /
+ int i = path.indexOf("/./");
+ while (i > -1)
+ {
+ path = path.substring(0, i + 1) + path.substring(i + 3);
+ i = path.indexOf("/./");
+ }
+
+ if (path.endsWith("/."))
+ path = path.substring(0, path.length() - 1);
+
+ int f = path.indexOf("/../");
+ while (f > 0)
+ {
+ int sb = path.lastIndexOf("/", f - 1);
+ if (sb > - 1)
+ path = path.substring(0, sb + 1) + (path.length() >= f + 4 ? path.substring(f + 4) : "");
+ f = path.indexOf("/../");
+ }
+
+ if (path.length() > 3 && path.endsWith("/.."))
+ {
+ int sb = path.lastIndexOf("/", path.length() - 4);
+ String segment = path.substring(sb, path.length() - 3);
+ if (!segment.equals(".."))
+ {
+ path = path.substring(0, sb + 1);
+ }
+ }
+
+ return path;
+ }
+
+ /**
+ * Assembles an URL from the given URL parts, each of these parts can be null.
+ * Used internally by {@link #absolutize}.
+ */
+ private static String makeUrl(String scheme, String authority, String path, String query, String fragment)
+ {
+ StringBuffer url = new StringBuffer();
+ if (scheme != null)
+ url.append(scheme).append(':');
+
+ if (authority != null)
+ url.append("//").append(authority);
+
+ if (path != null)
+ url.append(path);
+
+ if (query != null)
+ url.append('?').append(query);
+
+ if (fragment != null)
+ url.append('#').append(fragment);
+
+ return url.toString();
+ }
+
+ /**
+ * Parses an URL into its individual parts.
+ *
+ * <p>This is achieved using the following regular expression, which is copied
+ * literally from RFC 2396:
+ * <pre>
+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ * 12 3 4 5 6 7 8 9
+ * </pre>
+ *
+ * The result is an array containing 10 elements. The element 0 contains the entire matched url.
+ * The most interesting parts are:
+ * <pre>
+ * scheme = 2
+ * authority = 4
+ * path = 5
+ * query = 7
+ * fragment = 9
+ * </pre>
+ *
+ * To access these, you can use the predefined constants SCHEME, AUTHORITY, PATH, QUERY and FRAGMENT.
+ *
+ * <p>If a part is missing, its corresponding array entry will be null. The path-part will never be
+ * null, but rather an empty string. An empty authority (as in scheme:///a) will give an empty string
+ * for the authority.
+ *
+ * @param url the url to parse. Any part from this URL may be missing (i.e. it is not obligatory that
+ * the URL contains scheme, authority, path, query and fragment parts)
+ *
+ */
+ public static String[] parseUrl(String url) {
+ RE re = new RE(urlREProgram);
+ re.match(url);
+ String[] parts = new String[10];
+ for (int i = 0; i < 10; i++)
+ parts[i] = re.getParen(i);
+ return parts;
}
/**
1.3 +87 -12 avalon-excalibur/sourceresolve/src/test/org/apache/excalibur/source/test/SourceUtilTestCase.java
Index: SourceUtilTestCase.java
===================================================================
RCS file: /home/cvs/avalon-excalibur/sourceresolve/src/test/org/apache/excalibur/source/test/SourceUtilTestCase.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- SourceUtilTestCase.java 20 May 2003 20:56:43 -0000 1.2
+++ SourceUtilTestCase.java 7 Jun 2003 20:59:40 -0000 1.3
@@ -60,7 +60,7 @@
/**
* Test case for SourceUtil.
- *
+ *
* @author <a href="mailto:sylvain@apache.org">Sylvain Wallez</a>
* @version $Id$
*/
@@ -83,12 +83,12 @@
assertEquals("http", SourceUtil.getScheme(uri));
assertEquals("//foo", SourceUtil.getSpecificPart(uri));
}
-
+
public void testDoubleColon() throws Exception
{
assertEquals(4, SourceUtil.indexOfSchemeColon("file:foo:bar"));
}
-
+
public void testSpecialScheme() throws Exception
{
String uri = "a-+.:foo"; // Strange, but valid !
@@ -96,7 +96,7 @@
assertEquals("a-+.", SourceUtil.getScheme(uri));
assertEquals("foo", SourceUtil.getSpecificPart(uri));
}
-
+
public void testSpecialPart() throws Exception
{
String uri = "bar:";
@@ -104,31 +104,106 @@
assertEquals("bar", SourceUtil.getScheme(uri));
assertEquals("", SourceUtil.getSpecificPart(uri));
}
-
+
public void testInvalidScheme() throws Exception
{
String uri = "2foo:bar";
assertEquals(-1, SourceUtil.indexOfSchemeColon(uri));
assertEquals(null, SourceUtil.getScheme(uri));
assertEquals(null, SourceUtil.getSpecificPart(uri));
-
+
// Invalid character before any of the allowed ones
assertEquals(-1, SourceUtil.indexOfSchemeColon("h ttp:foo"));
assertEquals(-1, SourceUtil.indexOfSchemeColon(" http:foo"));
assertEquals(-1, SourceUtil.indexOfSchemeColon("http :foo"));
-
+
// Invalid character between allowed ranges
assertEquals(-1, SourceUtil.indexOfSchemeColon("h_ttp:foo"));
assertEquals(-1, SourceUtil.indexOfSchemeColon("_http:foo"));
assertEquals(-1, SourceUtil.indexOfSchemeColon("http_:foo"));
-
+
// Invalid character after any of the allowed ones
assertEquals(-1, SourceUtil.indexOfSchemeColon("h~ttp:foo"));
assertEquals(-1, SourceUtil.indexOfSchemeColon("~http:foo"));
assertEquals(-1, SourceUtil.indexOfSchemeColon("http~:foo"));
+ }
+
+ public void testAbsolutize()
+ {
+ String base = "http://a/b/c/d;p?q";
+
+ //
+ // Test examples from RFC 2396
+ //
+
+ // normal cases
+ assertEquals("g:h", SourceUtil.absolutize(base, "g:h"));
+ assertEquals("http://a/b/c/g", SourceUtil.absolutize(base, "g"));
+ assertEquals("http://a/b/c/g", SourceUtil.absolutize(base, "./g"));
+ assertEquals("http://a/b/c/g/", SourceUtil.absolutize(base, "g/"));
+ assertEquals("http://a/g", SourceUtil.absolutize(base, "/g"));
+ assertEquals("http://g", SourceUtil.absolutize(base, "//g"));
+ assertEquals("http://a/b/c/?y", SourceUtil.absolutize(base, "?y"));
+ assertEquals("http://a/b/c/g?y", SourceUtil.absolutize(base, "g?y"));
+ assertEquals("http://a/b/c/d;p?q#s", SourceUtil.absolutize(base, "#s"));
+ assertEquals("http://a/b/c/g#s", SourceUtil.absolutize(base, "g#s"));
+ assertEquals("http://a/b/c/g?y#s", SourceUtil.absolutize(base, "g?y#s"));
+ assertEquals("http://a/b/c/;x", SourceUtil.absolutize(base, ";x"));
+ assertEquals("http://a/b/c/g;x", SourceUtil.absolutize(base, "g;x"));
+ assertEquals("http://a/b/c/g;x?y#s", SourceUtil.absolutize(base, "g;x?y#s"));
+ assertEquals("http://a/b/c/", SourceUtil.absolutize(base, "."));
+ assertEquals("http://a/b/c/", SourceUtil.absolutize(base, "./"));
+ assertEquals("http://a/b/", SourceUtil.absolutize(base, ".."));
+ assertEquals("http://a/b/", SourceUtil.absolutize(base, "../"));
+ assertEquals("http://a/b/g", SourceUtil.absolutize(base, "../g"));
+ assertEquals("http://a/", SourceUtil.absolutize(base, "../.."));
+ assertEquals("http://a/", SourceUtil.absolutize(base, "../../"));
+ assertEquals("http://a/g", SourceUtil.absolutize(base, "../../g"));
+
+ // abnormal cases
+ assertEquals("http://a/../g", SourceUtil.absolutize(base, "../../../g"));
+ assertEquals("http://a/../../g", SourceUtil.absolutize(base, "../../../../g"));
+
+ assertEquals("http://a/./g", SourceUtil.absolutize(base, "/./g"));
+ assertEquals("http://a/../g", SourceUtil.absolutize(base, "/../g"));
+ assertEquals("http://a/b/c/g.", SourceUtil.absolutize(base, "g."));
+ assertEquals("http://a/b/c/.g", SourceUtil.absolutize(base, ".g"));
+ assertEquals("http://a/b/c/g..", SourceUtil.absolutize(base, "g.."));
+ assertEquals("http://a/b/c/..g", SourceUtil.absolutize(base, "..g"));
+
+ assertEquals("http://a/b/g", SourceUtil.absolutize(base, "./../g"));
+ assertEquals("http://a/b/c/g/", SourceUtil.absolutize(base, "./g/."));
+ assertEquals("http://a/b/c/g/h", SourceUtil.absolutize(base, "g/./h"));
+ assertEquals("http://a/b/c/h", SourceUtil.absolutize(base, "g/../h"));
+ assertEquals("http://a/b/c/g;x=1/y", SourceUtil.absolutize(base, "g;x=1/./y"));
+ assertEquals("http://a/b/c/y", SourceUtil.absolutize(base, "g;x=1/../y"));
+
+ assertEquals("http://a/b/c/g?y/./x", SourceUtil.absolutize(base, "g?y/./x"));
+ assertEquals("http://a/b/c/g?y/../x", SourceUtil.absolutize(base, "g?y/../x"));
+ assertEquals("http://a/b/c/g#s/./x", SourceUtil.absolutize(base, "g#s/./x"));
+ assertEquals("http://a/b/c/g#s/../x", SourceUtil.absolutize(base, "g#s/../x"));
+
+ //
+ // other tests
+ //
+
+ // if there's a scheme, url is absolute
+ assertEquals("http://a", SourceUtil.absolutize("", "http://a"));
+ assertEquals("cocoon:/a", SourceUtil.absolutize("", "cocoon:/a", true));
+
+ // handle null base
+ assertEquals("a", SourceUtil.absolutize(null, "a"));
+
+ // handle network reference
+ assertEquals("http://a/b", SourceUtil.absolutize("http://myhost", "//a/b"));
+
+ // handle empty authority
+ assertEquals("http:///a/b", SourceUtil.absolutize("http:///a/", "b"));
- assertEquals(-1, SourceUtil.indexOfSchemeColon("/file/with:colon"));
- assertEquals(-1, SourceUtil.indexOfSchemeColon(".foo:bar"));
- assertEquals(-1, SourceUtil.indexOfSchemeColon("no-colon"));
+ // cocoon and context protocols
+ assertEquals("cocoon://a/b/c", SourceUtil.absolutize("cocoon://a/b/", "c", true));
+ assertEquals("cocoon:/a/b/c", SourceUtil.absolutize("cocoon:/a/b/", "c", true));
+ assertEquals("cocoon://c", SourceUtil.absolutize("cocoon://a", "c", true));
+ assertEquals("cocoon://c", SourceUtil.absolutize("cocoon://a/b/", "../../c", true));
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: cvs-unsubscribe@avalon.apache.org
For additional commands, e-mail: cvs-help@avalon.apache.org