You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@avalon.apache.org by br...@apache.org on 2003/06/07 22:59:40 UTC

cvs commit: avalon-excalibur/sourceresolve/src/test/org/apache/excalibur/source/test SourceUtilTestCase.java

bruno       2003/06/07 13:59:40

  Modified:    sourceresolve/src/java/org/apache/excalibur/source
                        SourceUtil.java
               sourceresolve/src/test/org/apache/excalibur/source/test
                        SourceUtilTestCase.java
  Log:
  Added a few methods to SourceUtil to "absolutize" URLs (following RFC 2396).
  
  Revision  Changes    Path
  1.9       +243 -25   avalon-excalibur/sourceresolve/src/java/org/apache/excalibur/source/SourceUtil.java
  
  Index: SourceUtil.java
  ===================================================================
  RCS file: /home/cvs/avalon-excalibur/sourceresolve/src/java/org/apache/excalibur/source/SourceUtil.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- SourceUtil.java	7 Jun 2003 18:28:36 -0000	1.8
  +++ SourceUtil.java	7 Jun 2003 20:59:40 -0000	1.9
  @@ -59,6 +59,11 @@
   import java.util.Iterator;
   
   import org.apache.avalon.framework.parameters.Parameters;
  +import org.apache.avalon.framework.CascadingRuntimeException;
  +import org.apache.regexp.REProgram;
  +import org.apache.regexp.RECompiler;
  +import org.apache.regexp.RESyntaxException;
  +import org.apache.regexp.RE;
   
   /**
    *
  @@ -322,7 +327,7 @@
           }
           return null;
       }
  -    
  +
       /**
        * Move the source to a specified destination.
        *
  @@ -333,25 +338,25 @@
        *                         the move.
        */
       static public void move(Source source,
  -                              Source destination) 
  -    throws SourceException 
  +                              Source destination)
  +    throws SourceException
       {
           if (source instanceof MoveableSource
               && source.getClass().equals(destination.getClass()))
           {
               ((MoveableSource)source).moveTo(destination);
  -        } 
  -        else if (source instanceof ModifiableSource) 
  +        }
  +        else if (source instanceof ModifiableSource)
           {
               copy(source, destination);
               ((ModifiableSource) source).delete();
  -        } 
  -        else 
  +        }
  +        else
           {
               throw new SourceException("Source '"+source.getURI()+ "' is not writeable");
           }
       }
  -    
  +
       /**
        * Get the position of the scheme-delimiting colon in an absolute URI, as specified
        * by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>, appendix A. This method is
  @@ -361,7 +366,7 @@
        * Use this method when you need both the scheme and the scheme-specific part of an URI,
        * as calling successively {@link #getScheme(String)} and {@link #getSpecificPart(String)}
        * will call this method twice, and as such won't be efficient.
  -     * 
  +     *
        * @param uri the URI
        * @return int the scheme-delimiting colon, or <code>-1</code> if not found.
        */
  @@ -389,7 +394,7 @@
           {
               return -1;
           }
  -        
  +
           // Check that first character is alpha
           // (lowercase first since it's the most common case)
           char ch = uri.charAt(0);
  @@ -399,7 +404,7 @@
               // Invalid first character
               return -1;
           }
  -        
  +
           int pos = uri.indexOf(':');
           if (pos != -1)
           {
  @@ -417,13 +422,13 @@
                   }
               }
           }
  -        
  +
           return pos;
       }
  -    
  +
       /**
        * Get the scheme of an absolute URI.
  -     * 
  +     *
        * @param uri the absolute URI
        * @return the URI scheme
        */
  @@ -432,12 +437,12 @@
           int pos = indexOfSchemeColon(uri);
           return (pos == -1) ? null : uri.substring(0, pos);
       }
  -    
  +
       /**
        * Get the scheme-specific part of an absolute URI. Note that this includes everything
        * after the separating colon, including the fragment, if any (RFC 2396 separates it
        * from the scheme-specific part).
  -     * 
  +     *
        * @param uri the absolute URI
        * @return the scheme-specific part of the URI
        */
  @@ -457,14 +462,14 @@
        *                         the copy.
        */
       static public void copy(Source source,
  -                            Source destination) 
  +                            Source destination)
       throws SourceException {
  -        if (source instanceof MoveableSource 
  -            && source.getClass().equals(destination.getClass())) 
  +        if (source instanceof MoveableSource
  +            && source.getClass().equals(destination.getClass()))
           {
               ((MoveableSource) source).copyTo(destination);
  -        } 
  -        else 
  +        }
  +        else
           {
               if ( !(destination instanceof ModifiableSource)) {
                   throw new SourceException("Source '"+
  @@ -475,7 +480,7 @@
               try {
                   OutputStream out = ((ModifiableSource) destination).getOutputStream();
                   InputStream in = source.getInputStream();
  -                
  +
                   copy(in, out);
               } catch (IOException ioe) {
                   throw new SourceException("Could not copy source '"+
  @@ -485,10 +490,10 @@
               }
           }
       }
  -    
  +
       /**
        * Copy the contents of an <code>InputStream</code> to an <code>OutputStream</code>.
  -     * 
  +     *
        * @param in
        * @param out
        * @throws IOException
  @@ -504,6 +509,219 @@
           in.close();
           out.flush();
           out.close();
  +    }
  +
  +    private static final String urlregexp = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
  +    private static final REProgram urlREProgram;
  +    static
  +    {
  +        RECompiler compiler = new RECompiler();
  +        try
  +        {
  +            urlREProgram = compiler.compile(urlregexp);
  +        }
  +        catch (RESyntaxException e)
  +        {
  +            throw new CascadingRuntimeException("SourceUtil: could not compile urlregexp pattern", e);
  +        }
  +    }
  +
  +    public static final int SCHEME = 2;
  +    private static final int AUTHORITY_WITH_PRECEDING_SLASHES = 3;
  +    public static final int AUTHORITY = 4;
  +    public static final int PATH = 5;
  +    public static final int QUERY = 7;
  +    public static final int FRAGMENT = 9;
  +
  +    /**
  +     * Calls absolutize(url1, url2, false).
  +     */
  +    public static String absolutize(String url1, String url2)
  +    {
  +        return absolutize(url1, url2, false);
  +    }
  +
  +    /**
  +     * Applies a location to a baseURI. This is done as described in RFC 2396 section 5.2.
  +     *
  +     * @param url1 the baseURI
  +     * @param url2 the location
  +     * @param treatAuthorityAsBelongingToPath considers the authority to belong to the path. These
  +     * special kind of URIs are used in the Apache Cocoon project.
  +     */
  +    public static String absolutize(String url1, String url2, boolean treatAuthorityAsBelongingToPath)
  +    {
  +        if (url1 == null)
  +            return url2;
  +
  +        // parse the urls into parts
  +        // if the second url contains a scheme, it is not relative so return it right away (part 3 of the algorithm)
  +        String[] url2Parts = parseUrl(url2);
  +        if (url2Parts[SCHEME] != null)
  +            return url2;
  +        String[] url1Parts = parseUrl(url1);
  +
  +        if (treatAuthorityAsBelongingToPath)
  +            return absolutizeWithoutAuthority(url1Parts, url2Parts);
  +
  +        // check if it is a reference to the current document (part 2 of the algorithm)
  +        if (url2Parts[PATH].equals("") && url2Parts[QUERY] == null && url2Parts[AUTHORITY] == null)
  +            return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], url1Parts[PATH], url1Parts[QUERY], url2Parts[FRAGMENT]);
  +
  +        // it is a network reference (part 4 of the algorithm)
  +        if (url2Parts[AUTHORITY] != null)
  +            return makeUrl(url1Parts[SCHEME], url2Parts[AUTHORITY], url2Parts[PATH], url2Parts[QUERY], url2Parts[QUERY]);
  +
  +        String url1Path = url1Parts[PATH];
  +        String url2Path = url2Parts[PATH];
  +
  +        // if the path starts with a slash (part 5 of the algorithm)
  +        if (url2Path != null && url2Path.length() > 0 && url2Path.charAt(0) == '/')
  +            return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], url2Parts[PATH], url2Parts[QUERY], url2Parts[QUERY]);
  +
  +        // combine the 2 paths
  +        String path = stripLastSegment(url1Path);
  +        path = path + (path.endsWith("/") ? "" : "/") + url2Path;
  +        path = normalize(path);
  +
  +        return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], path, url2Parts[QUERY], url2Parts[FRAGMENT]);
  +    }
  +
  +    /**
  +     * Absolutizes URIs whereby the authority part is considered to be a part of the path.
  +     * This special kind of URIs is used in the Apache Cocoon project for the cocoon and context protocols.
  +     * This method is internally used by {@link #absolutize}.
  +     */
  +    private static String absolutizeWithoutAuthority(String[] url1Parts, String[] url2Parts)
  +    {
  +        String authority1 = url1Parts[AUTHORITY_WITH_PRECEDING_SLASHES];
  +        String authority2 = url2Parts[AUTHORITY_WITH_PRECEDING_SLASHES];
  +
  +        String path1 = url1Parts[PATH];
  +        String path2 = url2Parts[PATH];
  +
  +        if (authority1 != null)
  +            path1 = authority1 + path1;
  +        if (authority2 != null)
  +            path2 = authority2 + path2;
  +
  +        String path = stripLastSegment(path1);
  +        path = path + (path.endsWith("/") ? "" : "/") + path2;
  +        path = normalize(path);
  +
  +        String scheme = url1Parts[SCHEME];
  +        return scheme + ":" + path;
  +    }
  +
  +    private static String stripLastSegment(String path)
  +    {
  +        int i = path.lastIndexOf('/');
  +        if(i > -1)
  +            return path.substring(0, i + 1);
  +        return path;
  +    }
  +
  +    /**
  +     * Removes things like &lt;segment&gt;/../ or ./, as described in RFC 2396 in
  +     * step 6 of section 5.2.
  +     */
  +    private static String normalize(String path)
  +    {
  +        // replace all /./ with /
  +        int i = path.indexOf("/./");
  +        while (i > -1)
  +        {
  +            path = path.substring(0, i + 1) + path.substring(i + 3);
  +            i = path.indexOf("/./");
  +        }
  +
  +        if (path.endsWith("/."))
  +            path = path.substring(0, path.length() - 1);
  +
  +        int f = path.indexOf("/../");
  +        while (f > 0)
  +        {
  +            int sb = path.lastIndexOf("/", f - 1);
  +            if (sb > - 1)
  +                path = path.substring(0, sb + 1) + (path.length() >= f + 4 ? path.substring(f + 4) : "");
  +            f = path.indexOf("/../");
  +        }
  +
  +        if (path.length() > 3 && path.endsWith("/.."))
  +        {
  +            int sb = path.lastIndexOf("/", path.length() - 4);
  +            String segment = path.substring(sb, path.length() - 3);
  +            if (!segment.equals(".."))
  +            {
  +                path = path.substring(0, sb + 1);
  +            }
  +        }
  +
  +        return path;
  +    }
  +
  +    /**
  +     * Assembles an URL from the given URL parts, each of these parts can be null.
  +     * Used internally by {@link #absolutize}.
  +     */
  +    private static String makeUrl(String scheme, String authority, String path, String query, String fragment)
  +    {
  +        StringBuffer url = new StringBuffer();
  +        if (scheme != null)
  +            url.append(scheme).append(':');
  +
  +        if (authority != null)
  +            url.append("//").append(authority);
  +
  +        if (path != null)
  +            url.append(path);
  +
  +        if (query != null)
  +            url.append('?').append(query);
  +
  +        if (fragment != null)
  +            url.append('#').append(fragment);
  +
  +        return url.toString();
  +    }
  +
  +    /**
  +     * Parses an URL into its individual parts.
  +     *
  +     * <p>This is achieved using the following regular expression, which is copied
  +     * literally from RFC 2396:
  +     * <pre>
  +     * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  +     *  12            3  4          5       6  7        8 9
  +     * </pre>
  +     *
  +     * The result is an array containing 10 elements. The element 0 contains the entire matched url.
  +     * The most interesting parts are:
  +     * <pre>
  +     * scheme    = 2
  +     * authority = 4
  +     * path      = 5
  +     * query     = 7
  +     * fragment  = 9
  +     * </pre>
  +     *
  +     * To access these, you can use the predefined constants SCHEME, AUTHORITY, PATH, QUERY and FRAGMENT.
  +     *
  +     * <p>If a part is missing, its corresponding array entry will be null. The path-part will never be
  +     * null, but rather an empty string. An empty authority (as in scheme:///a) will give an empty string
  +     * for the authority.
  +     *
  +     * @param url the url to parse. Any part from this URL may be missing (i.e. it is not obligatory that
  +     * the URL contains scheme, authority, path, query and fragment parts)
  +     *
  +     */
  +    public static String[] parseUrl(String url) {
  +        RE re = new RE(urlREProgram);
  +        re.match(url);
  +        String[] parts = new String[10];
  +        for (int i = 0; i < 10; i++)
  +            parts[i] = re.getParen(i);
  +        return parts;
       }
   
       /**
  
  
  
  1.3       +87 -12    avalon-excalibur/sourceresolve/src/test/org/apache/excalibur/source/test/SourceUtilTestCase.java
  
  Index: SourceUtilTestCase.java
  ===================================================================
  RCS file: /home/cvs/avalon-excalibur/sourceresolve/src/test/org/apache/excalibur/source/test/SourceUtilTestCase.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- SourceUtilTestCase.java	20 May 2003 20:56:43 -0000	1.2
  +++ SourceUtilTestCase.java	7 Jun 2003 20:59:40 -0000	1.3
  @@ -60,7 +60,7 @@
   
   /**
    * Test case for SourceUtil.
  - * 
  + *
    * @author <a href="mailto:sylvain@apache.org">Sylvain Wallez</a>
    * @version $Id$
    */
  @@ -83,12 +83,12 @@
           assertEquals("http", SourceUtil.getScheme(uri));
           assertEquals("//foo", SourceUtil.getSpecificPart(uri));
       }
  -    
  +
       public void testDoubleColon() throws Exception
       {
           assertEquals(4, SourceUtil.indexOfSchemeColon("file:foo:bar"));
       }
  -    
  +
       public void testSpecialScheme() throws Exception
       {
           String uri = "a-+.:foo"; // Strange, but valid !
  @@ -96,7 +96,7 @@
           assertEquals("a-+.", SourceUtil.getScheme(uri));
           assertEquals("foo", SourceUtil.getSpecificPart(uri));
       }
  -    
  +
       public void testSpecialPart() throws Exception
       {
           String uri = "bar:";
  @@ -104,31 +104,106 @@
           assertEquals("bar", SourceUtil.getScheme(uri));
           assertEquals("", SourceUtil.getSpecificPart(uri));
       }
  -    
  +
       public void testInvalidScheme() throws Exception
       {
           String uri = "2foo:bar";
           assertEquals(-1, SourceUtil.indexOfSchemeColon(uri));
           assertEquals(null, SourceUtil.getScheme(uri));
           assertEquals(null, SourceUtil.getSpecificPart(uri));
  -        
  +
           // Invalid character before any of the allowed ones
           assertEquals(-1, SourceUtil.indexOfSchemeColon("h ttp:foo"));
           assertEquals(-1, SourceUtil.indexOfSchemeColon(" http:foo"));
           assertEquals(-1, SourceUtil.indexOfSchemeColon("http :foo"));
  -        
  +
          // Invalid character between allowed ranges
           assertEquals(-1, SourceUtil.indexOfSchemeColon("h_ttp:foo"));
           assertEquals(-1, SourceUtil.indexOfSchemeColon("_http:foo"));
           assertEquals(-1, SourceUtil.indexOfSchemeColon("http_:foo"));
  -        
  +
           // Invalid character after any of the allowed ones
           assertEquals(-1, SourceUtil.indexOfSchemeColon("h~ttp:foo"));
           assertEquals(-1, SourceUtil.indexOfSchemeColon("~http:foo"));
           assertEquals(-1, SourceUtil.indexOfSchemeColon("http~:foo"));
  +	}
  +
  +    public void testAbsolutize()
  +    {
  +        String base = "http://a/b/c/d;p?q";
  +
  +        //
  +        // Test examples from RFC 2396
  +        //
  +
  +        // normal cases
  +        assertEquals("g:h", SourceUtil.absolutize(base, "g:h"));
  +        assertEquals("http://a/b/c/g", SourceUtil.absolutize(base, "g"));
  +        assertEquals("http://a/b/c/g", SourceUtil.absolutize(base, "./g"));
  +        assertEquals("http://a/b/c/g/", SourceUtil.absolutize(base, "g/"));
  +        assertEquals("http://a/g", SourceUtil.absolutize(base, "/g"));
  +        assertEquals("http://g", SourceUtil.absolutize(base, "//g"));
  +        assertEquals("http://a/b/c/?y", SourceUtil.absolutize(base, "?y"));
  +        assertEquals("http://a/b/c/g?y", SourceUtil.absolutize(base, "g?y"));
  +        assertEquals("http://a/b/c/d;p?q#s", SourceUtil.absolutize(base, "#s"));
  +        assertEquals("http://a/b/c/g#s", SourceUtil.absolutize(base, "g#s"));
  +        assertEquals("http://a/b/c/g?y#s", SourceUtil.absolutize(base, "g?y#s"));
  +        assertEquals("http://a/b/c/;x", SourceUtil.absolutize(base, ";x"));
  +        assertEquals("http://a/b/c/g;x", SourceUtil.absolutize(base, "g;x"));
  +        assertEquals("http://a/b/c/g;x?y#s", SourceUtil.absolutize(base, "g;x?y#s"));
  +        assertEquals("http://a/b/c/", SourceUtil.absolutize(base, "."));
  +        assertEquals("http://a/b/c/", SourceUtil.absolutize(base, "./"));
  +        assertEquals("http://a/b/", SourceUtil.absolutize(base, ".."));
  +        assertEquals("http://a/b/", SourceUtil.absolutize(base, "../"));
  +        assertEquals("http://a/b/g", SourceUtil.absolutize(base, "../g"));
  +        assertEquals("http://a/", SourceUtil.absolutize(base, "../.."));
  +        assertEquals("http://a/", SourceUtil.absolutize(base, "../../"));
  +        assertEquals("http://a/g", SourceUtil.absolutize(base, "../../g"));
  +
  +        // abnormal cases
  +        assertEquals("http://a/../g", SourceUtil.absolutize(base, "../../../g"));
  +        assertEquals("http://a/../../g", SourceUtil.absolutize(base, "../../../../g"));
  +
  +        assertEquals("http://a/./g", SourceUtil.absolutize(base, "/./g"));
  +        assertEquals("http://a/../g", SourceUtil.absolutize(base, "/../g"));
  +        assertEquals("http://a/b/c/g.", SourceUtil.absolutize(base, "g."));
  +        assertEquals("http://a/b/c/.g", SourceUtil.absolutize(base, ".g"));
  +        assertEquals("http://a/b/c/g..", SourceUtil.absolutize(base, "g.."));
  +        assertEquals("http://a/b/c/..g", SourceUtil.absolutize(base, "..g"));
  +
  +        assertEquals("http://a/b/g", SourceUtil.absolutize(base, "./../g"));
  +        assertEquals("http://a/b/c/g/", SourceUtil.absolutize(base, "./g/."));
  +        assertEquals("http://a/b/c/g/h", SourceUtil.absolutize(base, "g/./h"));
  +        assertEquals("http://a/b/c/h", SourceUtil.absolutize(base, "g/../h"));
  +        assertEquals("http://a/b/c/g;x=1/y", SourceUtil.absolutize(base, "g;x=1/./y"));
  +        assertEquals("http://a/b/c/y", SourceUtil.absolutize(base, "g;x=1/../y"));
  +
  +        assertEquals("http://a/b/c/g?y/./x", SourceUtil.absolutize(base, "g?y/./x"));
  +        assertEquals("http://a/b/c/g?y/../x", SourceUtil.absolutize(base, "g?y/../x"));
  +        assertEquals("http://a/b/c/g#s/./x", SourceUtil.absolutize(base, "g#s/./x"));
  +        assertEquals("http://a/b/c/g#s/../x", SourceUtil.absolutize(base, "g#s/../x"));
  +
  +        //
  +        // other tests
  +        //
  +
  +        // if there's a scheme, url is absolute
  +        assertEquals("http://a", SourceUtil.absolutize("", "http://a"));
  +        assertEquals("cocoon:/a", SourceUtil.absolutize("", "cocoon:/a", true));
  +
  +        // handle null base
  +        assertEquals("a", SourceUtil.absolutize(null, "a"));
  +
  +        // handle network reference
  +        assertEquals("http://a/b", SourceUtil.absolutize("http://myhost", "//a/b"));
  +
  +        // handle empty authority
  +        assertEquals("http:///a/b", SourceUtil.absolutize("http:///a/", "b"));
   
  -        assertEquals(-1, SourceUtil.indexOfSchemeColon("/file/with:colon"));
  -        assertEquals(-1, SourceUtil.indexOfSchemeColon(".foo:bar"));
  -        assertEquals(-1, SourceUtil.indexOfSchemeColon("no-colon"));
  +        // cocoon and context protocols
  +        assertEquals("cocoon://a/b/c", SourceUtil.absolutize("cocoon://a/b/", "c", true));
  +        assertEquals("cocoon:/a/b/c", SourceUtil.absolutize("cocoon:/a/b/", "c", true));
  +        assertEquals("cocoon://c", SourceUtil.absolutize("cocoon://a", "c", true));
  +        assertEquals("cocoon://c", SourceUtil.absolutize("cocoon://a/b/", "../../c", true));
       }
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: cvs-unsubscribe@avalon.apache.org
For additional commands, e-mail: cvs-help@avalon.apache.org