You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by mb...@apache.org on 2003/07/15 14:46:33 UTC

cvs commit: jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/util EncodingUtil.java

mbecke      2003/07/15 05:46:33

  Modified:    httpclient/src/java/org/apache/commons/httpclient
                        HttpMethodBase.java
               httpclient/src/java/org/apache/commons/httpclient/methods
                        PostMethod.java
               httpclient/src/test/org/apache/commons/httpclient
                        TestMethodCharEncoding.java
  Added:       httpclient/src/java/org/apache/commons/httpclient/util
                        EncodingUtil.java
  Log:
  Changed query param encoding to UTF-8.
  Moved formUrlEncode() to a new EncodingUtil class.
  
  PR: 20481
  Submitted by: Michael Becke
  Reviewed by: Oleg Kalnichevski
  
  Revision  Changes    Path
  1.168     +17 -99    jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/HttpMethodBase.java
  
  Index: HttpMethodBase.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/HttpMethodBase.java,v
  retrieving revision 1.167
  retrieving revision 1.168
  diff -u -r1.167 -r1.168
  --- HttpMethodBase.java	15 Jul 2003 12:40:56 -0000	1.167
  +++ HttpMethodBase.java	15 Jul 2003 12:46:32 -0000	1.168
  @@ -68,7 +68,6 @@
   import java.io.IOException;
   import java.io.InputStream;
   import java.io.InterruptedIOException;
  -import java.util.BitSet;
   import java.util.HashSet;
   import java.util.Set;
   
  @@ -76,11 +75,11 @@
   import org.apache.commons.httpclient.auth.AuthenticationException;
   import org.apache.commons.httpclient.auth.HttpAuthenticator;
   import org.apache.commons.httpclient.auth.MalformedChallengeException;
  -import org.apache.commons.httpclient.cookie.MalformedCookieException;
   import org.apache.commons.httpclient.cookie.CookiePolicy;
   import org.apache.commons.httpclient.cookie.CookieSpec;
  +import org.apache.commons.httpclient.cookie.MalformedCookieException;
   import org.apache.commons.httpclient.protocol.Protocol;
  -import org.apache.commons.httpclient.util.URIUtil;
  +import org.apache.commons.httpclient.util.EncodingUtil;
   import org.apache.commons.logging.Log;
   import org.apache.commons.logging.LogFactory;
   
  @@ -160,32 +159,6 @@
           USER_AGENT = new Header("User-Agent", agent);
       }
   
  -    /**
  -     * BitSet of www-form-url safe characters.
  -     */
  -    protected static final BitSet WWW_FORM_URL = new BitSet(256);
  -
  -    // Static initializer for www_form_url
  -    static {
  -        // alpha characters
  -        for (int i = 'a'; i <= 'z'; i++) {
  -            WWW_FORM_URL.set(i);
  -        }
  -        for (int i = 'A'; i <= 'Z'; i++) {
  -            WWW_FORM_URL.set(i);
  -        }
  -        // numeric characters
  -        for (int i = '0'; i <= '9'; i++) {
  -            WWW_FORM_URL.set(i);
  -        }
  -        // blank to be replaced with +
  -        WWW_FORM_URL.set(' ');
  -        WWW_FORM_URL.set('-');
  -        WWW_FORM_URL.set('_');
  -        WWW_FORM_URL.set('.');
  -        WWW_FORM_URL.set('*');
  -    }
  -    
       // ----------------------------------------------------- Instance variables 
   
       /** My request headers, if any. */
  @@ -476,28 +449,32 @@
       }
   
       /**
  -     * Sets the query string.
  -     * The user must ensure that the string is properly URL encoded.
  -     * URIUtil.encodeAll, URIUtil.encodeWithinQuery or URIUtil.encodeQuery can
  -     * be used to encode parameter names and values.
  -     * The query string should not start with the question mark character.
  +     * Sets the query string. The user must ensure that the string is properly 
  +     * URL encoded. The query string should not start with the question mark character.
        *
        * @param queryString the query string
  +     * 
  +     * @see EncodingUtil#formUrlEncode(NameValuePair[], String)
        */
       public void setQueryString(String queryString) {
           this.queryString = queryString;
       }
   
       /**
  -     * Set my query string.
  +     * Sets the query string.  The pairs are encoded as UTF-8 characters.  To use
  +     * a different charset the parameters can be encoded manually using EncodingUtil 
  +     * and set as a single String.
        *
        * @param params an array of {@link NameValuePair}s to add as query string
        *        parameters. The name/value pairs will be automcatically 
        *        URL encoded
  +     * 
  +     * @see EncodingUtil#formUrlEncode(NameValuePair[], String)
  +     * @see #setQueryString(String)
        */
       public void setQueryString(NameValuePair[] params) {
           LOG.trace("enter HttpMethodBase.setQueryString(NameValuePair[])");
  -        queryString = formUrlEncode(params, HttpConstants.HTTP_ELEMENT_CHARSET);
  +        queryString = EncodingUtil.formUrlEncode(params, "UTF-8");
       }
   
       /**
  @@ -1719,65 +1696,6 @@
           buf.append(version);
           buf.append("\r\n");
           
  -        return buf.toString();
  -    }
  -    
  -    /**
  -     * @deprecated temporary method.  to be moved to commons Codec.
  -     * 
  -     * Form-urlencoding routine.
  -     *
  -     * The default encoding for all forms is `application/x-www-form-urlencoded'. 
  -     * A form data set is represented in this media type as follows:
  -     *
  -     * The form field names and values are escaped: space characters are replaced 
  -     * by `+', and then reserved characters are escaped as per [URL]; that is, 
  -     * non-alphanumeric characters are replaced by `%HH', a percent sign and two 
  -     * hexadecimal digits representing the ASCII code of the character. Line breaks, 
  -     * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
  -     * 
  -     * @param pairs the values to be encoded
  -     * @param charset the character set of pairs to be encoded
  -     * 
  -     * @return the urlencoded pairs
  -     * 
  -     * @since 2.0beta2
  -     */
  -    protected static String formUrlEncode(NameValuePair[] pairs, String charset) {
  -        
  -        StringBuffer buf = new StringBuffer();
  -        for (int i = 0; i < pairs.length; i++) {
  -            if (pairs[i].getName() != null) {
  -                if (i > 0) {
  -                    buf.append("&");
  -                }
  -                String queryName = pairs[i].getName();
  -                try {
  -                    queryName = URIUtil.encode(
  -                        queryName, 
  -                        WWW_FORM_URL, 
  -                        charset
  -                    ).replace(' ', '+');
  -                } catch (URIException urie) {
  -                    LOG.error("Error encoding pair name: " + queryName, urie);
  -                }
  -                buf.append(queryName);
  -                buf.append("=");
  -                if (pairs[i].getValue() != null) {
  -                    String queryValue = pairs[i].getValue();
  -                    try {
  -                        queryValue = URIUtil.encode(
  -                            queryValue, 
  -                            WWW_FORM_URL, 
  -                            charset
  -                        ).replace(' ', '+');
  -                    } catch (URIException urie) {
  -                        LOG.error("Error encoding pair value: " + queryValue, urie);
  -                    }
  -                    buf.append(queryValue);
  -                }
  -            }
  -        }
           return buf.toString();
       }
       
  
  
  
  1.47      +7 -6      jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/methods/PostMethod.java
  
  Index: PostMethod.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/methods/PostMethod.java,v
  retrieving revision 1.46
  retrieving revision 1.47
  diff -u -r1.46 -r1.47
  --- PostMethod.java	5 Jul 2003 18:43:04 -0000	1.46
  +++ PostMethod.java	15 Jul 2003 12:46:32 -0000	1.47
  @@ -71,6 +71,7 @@
   import org.apache.commons.httpclient.HttpException;
   import org.apache.commons.httpclient.HttpState;
   import org.apache.commons.httpclient.NameValuePair;
  +import org.apache.commons.httpclient.util.EncodingUtil;
   import org.apache.commons.logging.Log;
   import org.apache.commons.logging.LogFactory;
   
  @@ -98,7 +99,7 @@
    * @author <a href="mailto:remm@apache.org">Remy Maucherat</a>
    * @author <a href="mailto:dsale@us.britannica.com">Doug Sale</a>
    * @author <a href="mailto:jsdever@apache.org">Jeff Dever</a>
  - * @author Ortwin Gl�ck
  + * @author Ortwin Gl�ck
    * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
    * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
    *
  @@ -205,7 +206,7 @@
       protected byte[] generateRequestBody() {
           LOG.trace("enter PostMethod.renerateRequestBody()");
           if (!this.params.isEmpty()) {
  -            String content = formUrlEncode(getParameters(), getRequestCharSet());
  +            String content = EncodingUtil.formUrlEncode(getParameters(), getRequestCharSet());
               return HttpConstants.getContentBytes(content);
           } else {
               return super.generateRequestBody();
  
  
  
  1.3       +28 -0     jakarta-commons/httpclient/src/test/org/apache/commons/httpclient/TestMethodCharEncoding.java
  
  Index: TestMethodCharEncoding.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/httpclient/src/test/org/apache/commons/httpclient/TestMethodCharEncoding.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- TestMethodCharEncoding.java	19 Jun 2003 20:52:07 -0000	1.2
  +++ TestMethodCharEncoding.java	15 Jul 2003 12:46:33 -0000	1.3
  @@ -240,6 +240,34 @@
   
       }
   
  +    public void testQueryParams() throws IOException {
  +
  +        GetMethod get = new GetMethod("/");
  +
  +        String ru_msg = constructString(RUSSIAN_STUFF_UNICODE); 
  +        String ch_msg = constructString(SWISS_GERMAN_STUFF_UNICODE); 
  +
  +        get.setQueryString(new NameValuePair[] {
  +            new NameValuePair("ru", ru_msg),
  +            new NameValuePair("ch", ch_msg) 
  +        });            
  +
  +        Map params = new HashMap();
  +        StringTokenizer tokenizer = new StringTokenizer(
  +            get.getQueryString(), "&");
  +        while (tokenizer.hasMoreTokens()) {
  +            String s = tokenizer.nextToken();
  +            int i = s.indexOf('=');
  +            assertTrue("Invalid url-encoded parameters", i != -1);
  +            String name = s.substring(0, i).trim(); 
  +            String value = s.substring(i + 1, s.length()).trim(); 
  +            value = URIUtil.decode(value, CHARSET_UTF8);
  +            params.put(name, value);
  +        }
  +        assertEquals(ru_msg, params.get("ru"));
  +        assertEquals(ch_msg, params.get("ch"));
  +    }
  +
       public void testUrlEncodedRequestBody() throws IOException {
   
           PostMethod httppost = new PostMethod("/");
  
  
  
  1.1                  jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/util/EncodingUtil.java
  
  Index: EncodingUtil.java
  ===================================================================
  /*
   * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/util/EncodingUtil.java,v 1.1 2003/07/15 12:46:33 mbecke Exp $
   * $Revision: 1.1 $
   * $Date: 2003/07/15 12:46:33 $
   *
   * ====================================================================
   *
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999-2003 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "Commons", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   * [Additional notices, if required by prior licensing conditions]
   *
   */
  package org.apache.commons.httpclient.util;
  
  import java.util.BitSet;
  
  import org.apache.commons.httpclient.NameValuePair;
  import org.apache.commons.httpclient.URIException;
  import org.apache.commons.logging.Log;
  import org.apache.commons.logging.LogFactory;
  
  /**
   * The home for utility methods that handle various encoding tasks.
   * 
   * @author Michael Becke
   * 
   * @since 2.0 final
   */
  public class EncodingUtil {
  
      /** Log object for this class. */
      private static final Log LOG = LogFactory.getLog(EncodingUtil.class);
  
      /**
       * BitSet of www-form-url safe characters.
       */
      private static final BitSet WWW_FORM_URL = new BitSet(256);
  
      // Static initializer for www_form_url
      static {
          // alpha characters
          for (int i = 'a'; i <= 'z'; i++) {
              WWW_FORM_URL.set(i);
          }
          for (int i = 'A'; i <= 'Z'; i++) {
              WWW_FORM_URL.set(i);
          }
          // numeric characters
          for (int i = '0'; i <= '9'; i++) {
              WWW_FORM_URL.set(i);
          }
          // blank to be replaced with +
          WWW_FORM_URL.set(' ');
          WWW_FORM_URL.set('-');
          WWW_FORM_URL.set('_');
          WWW_FORM_URL.set('.');
          WWW_FORM_URL.set('*');
      }
      
      /**
       * Form-urlencoding routine.
       *
       * The default encoding for all forms is `application/x-www-form-urlencoded'. 
       * A form data set is represented in this media type as follows:
       *
       * The form field names and values are escaped: space characters are replaced 
       * by `+', and then reserved characters are escaped as per [URL]; that is, 
       * non-alphanumeric characters are replaced by `%HH', a percent sign and two 
       * hexadecimal digits representing the ASCII code of the character. Line breaks, 
       * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
       * 
       * @param pairs the values to be encoded
       * @param charset the character set of pairs to be encoded
       * 
       * @return the urlencoded pairs
       * 
       * @since 2.0 final
       */
      public static String formUrlEncode(NameValuePair[] pairs, String charset) {
          
          StringBuffer buf = new StringBuffer();
          for (int i = 0; i < pairs.length; i++) {
              if (pairs[i].getName() != null) {
                  if (i > 0) {
                      buf.append("&");
                  }
                  String queryName = pairs[i].getName();
                  try {
                      queryName = URIUtil.encode(queryName, WWW_FORM_URL, charset).replace(' ', '+');
                  } catch (URIException urie) {
                      LOG.error("Error encoding pair name: " + queryName, urie);
                  }
                  buf.append(queryName);
                  buf.append("=");
                  if (pairs[i].getValue() != null) {
                      String queryValue = pairs[i].getValue();
                      try {
                          queryValue = URIUtil.encode(
                              queryValue, 
                              WWW_FORM_URL, 
                              charset
                          ).replace(' ', '+');
                      } catch (URIException urie) {
                          LOG.error("Error encoding pair value: " + queryValue, urie);
                      }
                      buf.append(queryValue);
                  }
              }
          }
          return buf.toString();
      }
      
      /**
       * This class should not be instantiated.
       */
      private EncodingUtil() {
      }
  
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Re: cvs commit: jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/util EncodingUtil.java

Posted by Michael Becke <be...@u.washington.edu>.
Hi Ortwin,

Sorry, it seems my machine is incapable of keeping the ü in your name:)

Mike

Ortwin Glück wrote:
> Excellent!
> 
> Although you are still messing with the encoding of the Source files :-) 
> Check my name: The ü is now a decomposed Unicode Character in UTF-8. 3 
> Bytes :-)
> 
> 
>>   - * @author Ortwin Glück
>>   + * @author Ortwin Gl�ck
> 
> 
> 
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: commons-dev-help@jakarta.apache.org
> 


---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Re: cvs commit: jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/util EncodingUtil.java

Posted by Ortwin Glück <or...@nose.ch>.
Excellent!

Although you are still messing with the encoding of the Source files :-) 
Check my name: The ü is now a decomposed Unicode Character in UTF-8. 3 
Bytes :-)


>   - * @author Ortwin Glück
>   + * @author Ortwin Gl�ck



---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org