You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hc.apache.org by se...@apache.org on 2012/06/27 01:23:22 UTC

svn commit: r1354288 - in /httpcomponents/httpclient/trunk/httpclient/src: main/java/org/apache/http/client/utils/URIBuilder.java main/java/org/apache/http/client/utils/URLEncodedUtils.java test/java/org/apache/http/client/utils/TestURIBuilder.java

Author: sebb
Date: Tue Jun 26 23:23:21 2012
New Revision: 1354288

URL: http://svn.apache.org/viewvc?rev=1354288&view=rev
Log:
HTTPCLIENT-1195 - URIBuilder-created query strings are double-escaped
Add [ ] to reserved chars
Use separate safe sets for userinfo and fragment

Modified:
    httpcomponents/httpclient/trunk/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java
    httpcomponents/httpclient/trunk/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java
    httpcomponents/httpclient/trunk/httpclient/src/test/java/org/apache/http/client/utils/TestURIBuilder.java

Modified: httpcomponents/httpclient/trunk/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java
URL: http://svn.apache.org/viewvc/httpcomponents/httpclient/trunk/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java?rev=1354288&r1=1354287&r2=1354288&view=diff
==============================================================================
--- httpcomponents/httpclient/trunk/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java (original)
+++ httpcomponents/httpclient/trunk/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java Tue Jun 26 23:23:21 2012
@@ -165,7 +165,7 @@ public class URIBuilder {
     }
 
     private String encodeUserInfo(final String userInfo) {
-        return URLEncodedUtils.enc(userInfo, Consts.UTF_8);
+        return URLEncodedUtils.encUserInfo(userInfo, Consts.UTF_8);
     }
 
     private String encodePath(final String path) {
@@ -177,7 +177,7 @@ public class URIBuilder {
     }
 
     private String encodeFragment(final String fragment) {
-        return URLEncodedUtils.enc(fragment, Consts.UTF_8);
+        return URLEncodedUtils.encFragment(fragment, Consts.UTF_8);
     }
 
     /**

Modified: httpcomponents/httpclient/trunk/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java
URL: http://svn.apache.org/viewvc/httpcomponents/httpclient/trunk/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java?rev=1354288&r1=1354287&r2=1354288&view=diff
==============================================================================
--- httpcomponents/httpclient/trunk/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java (original)
+++ httpcomponents/httpclient/trunk/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java Tue Jun 26 23:23:21 2012
@@ -261,17 +261,35 @@ public class URLEncodedUtils {
     /** 
      * Unreserved characters, i.e. alphanumeric, plus: {@code _ - ! . ~ ' ( ) *}
      * <p>
-     *  This list is the same as the {@code unreserved} list in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</>
+     *  This list is the same as the {@code unreserved} list in
+     *  <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
      */
     private static final BitSet UNRESERVED   = new BitSet(256);
-    /** Punctuation characters: , ; : $ & + = */
+    /**
+     * Punctuation characters: , ; : $ & + =
+     * <p>
+     * These are the additional characters allowed by userinfo.
+     */
     private static final BitSet PUNCT        = new BitSet(256);
-    /** Characters which are safe to use, i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation */
-    private static final BitSet SAFE         = new BitSet(256);
+    /** Characters which are safe to use in userinfo, i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation */
+    private static final BitSet USERINFO     = new BitSet(256);
     /** Characters which are safe to use in a path, i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation plus / @ */
     private static final BitSet PATHSAFE     = new BitSet(256);
+    /** Characters which are safe to use in a fragment, i.e. {@link #RESERVED} plus {@link #UNRESERVED} */
+    private static final BitSet FRAGMENT     = new BitSet(256);
 
     /** 
+     * Reserved characters, i.e. {@code ;/?:@&=+$,[]}
+     * <p>
+     *  This list is the same as the {@code reserved} list in 
+     *  <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
+     *  as augmented by
+     *  <a href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>
+     */
+    private static final BitSet RESERVED     = new BitSet(256);
+
+    
+    /** 
      * Safe characters for x-www-form-urlencoded data, as per java.net.URLEncoder and browser behaviour,
      * i.e. alphanumeric plus {@code "-", "_", ".", "*"}
      */
@@ -290,7 +308,7 @@ public class URLEncodedUtils {
         for (int i = '0'; i <= '9'; i++) {
             UNRESERVED.set(i);
         }
-        UNRESERVED.set('_');
+        UNRESERVED.set('_'); // these are the charactes of the "mark" list
         UNRESERVED.set('-');
         UNRESERVED.set('.');
         UNRESERVED.set('*');
@@ -308,14 +326,37 @@ public class URLEncodedUtils {
         PUNCT.set('&');
         PUNCT.set('+');
         PUNCT.set('=');
-        // URL path safe
-        SAFE.or(UNRESERVED);
-        SAFE.or(PUNCT);
+        // Safe for userinfo
+        USERINFO.or(UNRESERVED);
+        USERINFO.or(PUNCT);
+
         // URL path safe
         PATHSAFE.or(UNRESERVED);
-        PATHSAFE.or(PUNCT);
-        PATHSAFE.set('/');
+        PATHSAFE.set('/'); // segment separator
+        PATHSAFE.set(';'); // param separator
+        PATHSAFE.set(':'); // rest as per list in 2396, i.e. : @ & = + $ ,
         PATHSAFE.set('@');
+        PATHSAFE.set('&');
+        PATHSAFE.set('=');
+        PATHSAFE.set('+');
+        PATHSAFE.set('$');
+        PATHSAFE.set(',');
+        
+        RESERVED.set(';');
+        RESERVED.set('/');
+        RESERVED.set('?');
+        RESERVED.set(':');
+        RESERVED.set('@');
+        RESERVED.set('&');
+        RESERVED.set('=');
+        RESERVED.set('+');
+        RESERVED.set('$');
+        RESERVED.set(',');
+        RESERVED.set('['); // added by RFC 2732
+        RESERVED.set(']'); // added by RFC 2732
+        
+        FRAGMENT.or(RESERVED);
+        FRAGMENT.or(UNRESERVED);
     }
 
     private static final int RADIX = 16;
@@ -462,16 +503,29 @@ public class URLEncodedUtils {
     }
 
     /**
-     * Encode a String using the {@link #SAFE} set of characters.
+     * Encode a String using the {@link #USERINFO} set of characters.
+     * <p>
+     * Used by URIBuilder to encode the userinfo segment.
+     * 
+     * @param content the string to encode, does not convert space to '+'
+     * @param charset the charset to use
+     * @return the encoded string
+     */
+    static String encUserInfo(final String content, final Charset charset) {
+        return urlencode(content, charset, USERINFO, false);
+    }
+
+    /**
+     * Encode a String using the {@link #FRAGMENT} set of characters.
      * <p>
-     * Used by URIBuilder to encode userinfo and fragment segments.
+     * Used by URIBuilder to encode the userinfo segment.
      * 
      * @param content the string to encode, does not convert space to '+'
      * @param charset the charset to use
      * @return the encoded string
      */
-    static String enc(final String content, final Charset charset) {
-        return urlencode(content, charset, SAFE, false);
+    static String encFragment(final String content, final Charset charset) {
+        return urlencode(content, charset, FRAGMENT, false);
     }
 
     /**

Modified: httpcomponents/httpclient/trunk/httpclient/src/test/java/org/apache/http/client/utils/TestURIBuilder.java
URL: http://svn.apache.org/viewvc/httpcomponents/httpclient/trunk/httpclient/src/test/java/org/apache/http/client/utils/TestURIBuilder.java?rev=1354288&r1=1354287&r2=1354288&view=diff
==============================================================================
--- httpcomponents/httpclient/trunk/httpclient/src/test/java/org/apache/http/client/utils/TestURIBuilder.java (original)
+++ httpcomponents/httpclient/trunk/httpclient/src/test/java/org/apache/http/client/utils/TestURIBuilder.java Tue Jun 26 23:23:21 2012
@@ -193,4 +193,36 @@ public class TestURIBuilder {
 
     }
 
+    @Test
+    public void testAgainstURIEncoded() throws Exception {
+        // Check that the encoded URI generated by URI builder agrees with that generated by using URI directly
+        final String scheme="https";
+        final String host="localhost";
+        final String specials="/ abcd!$&*()_-+.,=:;'~<>/@[]|#^%\"{}\\`xyz"; // N.B. excludes £¬¦
+        final String formdatasafe = "abcd-_.*zyz";
+        URI uri = new URI(scheme, specials, host, 80, specials,
+                          formdatasafe, // TODO replace with specials when supported
+                          specials);
+
+        URI bld = new URIBuilder()
+                .setScheme(scheme)
+                .setHost(host)
+                .setUserInfo(specials)
+                .setPath(specials)
+                .addParameter(formdatasafe, null) // TODO replace with specials when supported
+                .setFragment(specials)
+                .build();
+
+        Assert.assertEquals(uri.getHost(), bld.getHost());
+        
+        Assert.assertEquals(uri.getRawUserInfo(), bld.getRawUserInfo());
+        
+        Assert.assertEquals(uri.getRawPath(), bld.getRawPath());
+
+        Assert.assertEquals(uri.getRawQuery(), bld.getRawQuery());
+
+        Assert.assertEquals(uri.getRawFragment(), bld.getRawFragment());
+
+    }
+
 }