You are viewing a plain text version of this content. The canonical link for it is here.
Posted to slide-dev@jakarta.apache.org by je...@apache.org on 2001/04/26 04:08:05 UTC

cvs commit: jakarta-slide/src/webdav/client/src/org/apache/webdav/util URIUtil.java

jericho     01/04/25 19:08:04

  Added:       src/webdav/client/src/org/apache/webdav/util URIUtil.java
  Log:
  - The new version of URI escaping and unescaping class.
  - It's gonna integrated with HttpURL and GenricURI class.
  - So, It's need WebdavResource class to be modified.
  - NOTICE: It's experimetal.
  
  Revision  Changes    Path
  1.1                  jakarta-slide/src/webdav/client/src/org/apache/webdav/util/URIUtil.java
  
  Index: URIUtil.java
  ===================================================================
  /*
   * $Header: /home/cvs/jakarta-slide/src/webdav/client/src/org/apache/webdav/util/URIUtil.java,v 1.1 2001/04/26 02:08:04 jericho Exp $
   * $Revision: 1.1 $
   * $Date: 2001/04/26 02:08:04 $
   *
   * ====================================================================
   *
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "Tomcat", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   * [Additional notices, if required by prior licensing conditions]
   *
   */
  
  package org.apache.webdav.util;
  
  import java.io.UnsupportedEncodingException;
  import java.io.ByteArrayOutputStream;
  import java.io.OutputStreamWriter;
  import java.io.IOException;
  import java.util.ArrayList;
  import java.util.Map;
  import java.util.BitSet;
  
  
  /**
   * General purpose request parsing and encoding utility methods.
   *
   * NOTICE: In order to do URI escaping, using the reserved characters defined
   * in this class is not recommended for the the specific protocol.
   *
   * @author Craig R. McClanahan
   * @author Tim Tye
   * @author Remy Maucherat
   * @author Park, Sung-Gu
   * @version $Revision: 1.1 $ $Date: 2001/04/26 02:08:04 $
   */
  
  public class URIUtil {
  
      // -------------------------------------------------------------- Constants
  
      
      /**
       * Array containing the ASCII expression for hexadecimal.
       */
      private static final char[] hexadecimal =
      {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 
       'A', 'B', 'C', 'D', 'E', 'F'};
  
  
      /**
       * The default encoding for URI characters.
       */
      private static final String defaultEnc = "UTF-8";
  
  
      // ----------------------------------------------------- Instance Variables
  
  
      /**
       * Array containing the alphanum characters set.
       */
      private static BitSet alphanum;
  
  
      /**
       * Array containing the reserved characters set of the scheme part.
       */
      public static BitSet schemeReserved;
  
  
      /**
       * Array containing the reserved characters set of the authority part.
       */
      public static BitSet authorityReserved;
  
  
      /**
       * Array containing the reserved characters set of the userinfo part.
       */
      public static BitSet userinfoReserved;
  
  
      /**
       * Array containing the reserved characters set of the host part.
       */
      public static BitSet hostReserved;
  
  
      /**
       * Array containing the reserved characters set of the path part.
       */
      public static BitSet pathReserved;
  
  
      /**
       * Array containing the reserved characters set of the query.
       */
      public static BitSet queryReserved;
  
  
      // ----------------------------------------------------- Static Initializer
  
  
      static {
  
          // Save the alphanum characters that is common to do URI escaping.
          alphanum = new BitSet(128);
          for (int i = 'a'; i <= 'z'; i++) {
              alphanum.set(i);
          }
          for (int i = 'A'; i <= 'Z'; i++) {
              alphanum.set(i);
          }
          for (int i = '0'; i <= '9'; i++) {
              alphanum.set(i);
          }
  
          // Save the reserved characters within the sheme component.
          schemeReserved = new BitSet(128);
          /**
           * Actually, this should be any combination of lower case letters,
           * digits, plus ("+"), period ("."), or hyphen ("-").
           * The upper case letters should be treated as equivalent to lower
           * case in scheme names.
           */
          schemeReserved.set('+');
          schemeReserved.set('.');
          schemeReserved.set('-');
  
          // Save the reserved characters within the authority component.
          authorityReserved = new BitSet(128);
          authorityReserved.set(';');
          authorityReserved.set(':');
          authorityReserved.set('@');
          authorityReserved.set('?');
          authorityReserved.set('/');
  
          // Save the reserved characters within the userinfo component.
          userinfoReserved = new BitSet(128);
          userinfoReserved.set(';');
          userinfoReserved.set(':');
          userinfoReserved.set('&');
          userinfoReserved.set('=');
          userinfoReserved.set('+');
          userinfoReserved.set('$');
          userinfoReserved.set(',');
  
          // Save the reserved characters within the host component.
          hostReserved = new BitSet(128);
          hostReserved.set('.');
          hostReserved.set('-');
  
          // Save the reserved characters within the path component.
          pathReserved = new BitSet(128);
          pathReserved.set('/');
          pathReserved.set(';');
          pathReserved.set('=');
          pathReserved.set('?');
  
          // Save the reserved characters within the query component.
          queryReserved = new BitSet(128);
          queryReserved.set(';');
          queryReserved.set('/');
          queryReserved.set('?');
          queryReserved.set(':');
          queryReserved.set('@');
          queryReserved.set('&');
          queryReserved.set('=');
          queryReserved.set('+');
          queryReserved.set(',');
          queryReserved.set('$');
  
      }
  
  
      // -------------------------------------------------------- Private Methods
  
  
      /**
       * Convert a byte character value to hexidecimal digit value.
       *
       * @param b the character value byte
       */
      private static synchronized byte convertHexDigit(byte b) {
          if ((b >= '0') && (b <= '9')) return (byte)(b - '0');
          if ((b >= 'a') && (b <= 'f')) return (byte)(b - 'a' + 10);
          if ((b >= 'A') && (b <= 'F')) return (byte)(b - 'A' + 10);
          return 0;
      }
  
      
      // --------------------------------------------------------- Public Methods
      
      
      /**
       * Unescape and return the specified URI-escaped String.
       * When the byte array is converted to a string, the system default
       * character encoding is used.  In order to solve this problem, the
       * default encoding should be used.
       *
       * @param str The uri-escaped string
       * @exception IllegalArgumentException if a '%' character is not followed
       * by a valid 2-digit hexadecimal number
       */
      public static String unescape(String str) {
          return unescape(str, defaultEnc);
      }
  
  
      /**
       * Unescape and return the specified URI-escaped String.
       *
       * @param str The uri-escaped string.
       * @param enc The encoding to use.
       * @exception IllegalArgumentException if a '%' character is not followed
       * by a valid 2-digit hexadecimal number
       */
      public static String unescape(String str, String enc) {
          return (str == null) ? null : unescape(str.getBytes(), enc);
      }
  
  
      /**
       * Unescape and return the specified URI-escaped byte array.
       *
       * @param bytes The uri-escaped byte array
       * @exception IllegalArgumentException if a '%' character is not followed
       * by a valid 2-digit hexadecimal number
       */
      public static String unescape(byte[] bytes) {
          return unescape(bytes, defaultEnc);
      }
  
  
      /**
       * Unescape and return the specified URI-escaped byte array.
       *
       * @param bytes The uri-escaped byte array
       * @param enc The encoding to use
       * @exception IllegalArgumentException if a '%' character is not followed
       * by a valid 2-digit hexadecimal number
       */
      public static synchronized String unescape(byte[] bytes, String enc) {
          
          if (bytes == null)
              return (null);
          
          int len = bytes.length;
          int ix = 0;
          int ox = 0;
          while (ix < len) {
              byte b = bytes[ix++];     // Get byte to test
              if (b == '+') {
                  b = (byte)' ';
              } else if (b == '%') {
                  b = (byte) ((convertHexDigit(bytes[ix++]) << 4)
                              + convertHexDigit(bytes[ix++]));
              }
              bytes[ox++] = b;
          }
          if (enc != null) {
              try {
                  return new String(bytes, 0, ox, enc);
              } catch (Exception e) {
                  e.printStackTrace();
              }
          }
          return new String(bytes, 0, ox);
          
      }
  
  
      /**
       * URI rewriter.
       * 
       * @param str The string which has to be rewiten
       */
      public static String escape(String str) {
          return escape(str, defaultEnc);
      }
  
  
      /**
       * URI rewriter.
       * 
       * @param str The string which has to be rewritten.
       * @param enc The encoding to use.
       */
      public static String escape(String str, String enc) {
          return escape(str, enc, null);
      }
  
  
      /**
       * URI rewriter.
       * 
       * @param str The string which has to be rewritten.
       * @param allowed The additional allowed characters not to escape.
       */
      public static String escape(String str, BitSet allowed) {
          return escape(str, defaultEnc, allowed);
      }
  
  
      /**
       * URI rewriter.
       * 
       * @param str The string which has to be rewritten.
       * @param enc The encoding to use.
       * @param allowed The additional allowed characters not to escape.
       */
      public static synchronized String escape(String str, String enc,
                                                  BitSet allowed) {
  
          if (str == null)
              return (null);
  
          int maxBytesPerChar = 10;
          int caseDiff = ('a' - 'A');
          StringBuffer rewrittenStr = new StringBuffer(str.length());
          ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar);
          OutputStreamWriter writer = null;
          try {
              // The same encoding as the one specified above should be used.
              writer = new OutputStreamWriter(buf, enc);
          } catch (Exception e) {
              e.printStackTrace();
              writer = new OutputStreamWriter(buf);
          }
  
          for (int i = 0; i < str.length(); i++) {
              int c = (int) str.charAt(i);
              if (alphanum.get(c)) {
                  rewrittenStr.append((char)c);
              } else if (allowed != null && allowed.get(c)) {
                  rewrittenStr.append((char)c);
              } else {
                  // convert to external encoding before hex conversion
                  try {
                      writer.write(c);
                      writer.flush();
                  } catch(IOException e) {
                      buf.reset();
                      continue;
                  }
                  byte[] ba = buf.toByteArray();
                  for (int j = 0; j < ba.length; j++) {
                      // Converting each byte in the buffer
                      byte toEscape = ba[j];
                      rewrittenStr.append('%');
                      int low = (int) (toEscape & 0x0f);
                      int high = (int) ((toEscape & 0xf0) >> 4);
                      rewrittenStr.append(hexadecimal[high]);
                      rewrittenStr.append(hexadecimal[low]);
                  }
                  buf.reset();
              }
          }
  
          return rewrittenStr.toString();
  
      }
      
  }