You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by br...@apache.org on 2003/05/23 15:08:37 UTC
cvs commit: cocoon-2.1/src/java/org/apache/cocoon/util NetUtils.java
bruno 2003/05/23 06:08:37
Modified: src/java/org/apache/cocoon/util NetUtils.java
Log:
Improved implementation of decodePath
Revision Changes Path
1.2 +46 -8 cocoon-2.1/src/java/org/apache/cocoon/util/NetUtils.java
Index: NetUtils.java
===================================================================
RCS file: /home/cvs/cocoon-2.1/src/java/org/apache/cocoon/util/NetUtils.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- NetUtils.java 9 Mar 2003 00:09:43 -0000 1.1
+++ NetUtils.java 23 May 2003 13:08:35 -0000 1.2
@@ -53,6 +53,7 @@
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
import java.util.BitSet;
import java.util.Enumeration;
@@ -122,19 +123,56 @@
}
/**
- * Decode a path
+ * Decode a path.
+ *
+ * <p>Interprets %XX (where XX is hexadecimal number) as UTF-8 encoded bytes.
+ * <p>The validity of the input path is not checked (i.e. characters that were not encoded will
+ * not be reported as errors).
+ * <p>This method differs from URLDecoder.decode in that it always uses UTF-8 (while URLDecoder
+ * uses the platform default encoding, often ISO-8859-1), and doesn't translate + characters to spaces.
*
* @param path the path to decode
* @return the decoded path
*/
public static String decodePath(String path) {
- // VG: JDK1.2 MEthods throws an exception; JDK1.3 - not.
- // http://java.sun.com/products/jdk/1.2/docs/api/java/net/URLDecoder.html#decode(java.lang.String)
- try {
- return java.net.URLDecoder.decode( path );
- } catch (Exception e) {
- return path;
+ StringBuffer translatedPath = new StringBuffer(path.length());
+ byte[] encodedchars = new byte[path.length() / 3];
+ int i = 0;
+ int length = path.length();
+ int encodedcharsLength = 0;
+ while (i < length) {
+ if (path.charAt(i) == '%') {
+ // we must process all consecutive %-encoded characters in one go, because they represent
+ // an UTF-8 encoded string, and in UTF-8 one character can be encoded as multiple bytes
+ while (i < length && path.charAt(i) == '%') {
+ if (i + 2 < length) {
+ try {
+ byte x = (byte)Integer.parseInt(path.substring(i + 1, i + 3), 16);
+ encodedchars[encodedcharsLength] = x;
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("NetUtils.decodePath: illegal hex characters in pattern %" + path.substring(i + 1, i + 3));
+ }
+ encodedcharsLength++;
+ i += 3;
+ } else {
+ throw new IllegalArgumentException("NetUtils.decodePath: % character should be followed by 2 hexadecimal characters.");
+ }
+ }
+ try {
+ String translatedPart = new String(encodedchars, 0, encodedcharsLength, "UTF-8");
+ translatedPath.append(translatedPart);
+ } catch (UnsupportedEncodingException e) {
+ // the situation that UTF-8 is not supported is quite theoretical, so throw a runtime exception
+ throw new RuntimeException("Problem in decodePath: UTF-8 encoding not supported.");
+ }
+ encodedcharsLength = 0;
+ } else {
+ // a normal character
+ translatedPath.append(path.charAt(i));
+ i++;
+ }
}
+ return translatedPath.toString();
}
/**
Re: cvs commit: cocoon-2.1/src/java/org/apache/cocoon/util NetUtils.java
Posted by Vadim Gritsenko <va...@verizon.net>.
Bruno Dumon wrote:
>On Fri, 2003-05-23 at 15:33, Vadim Gritsenko wrote:
>
>
>>bruno@apache.org wrote:
>>
>>
>>
>[..]
>
>
>>Why it's normal?
>>
>>http://java.sun.com/products/jdk/1.2/docs/api/java/net/URLDecoder.html:
>> "The plus sign '|+|'is converted into a space character '| |'"
>>
>>
>>
>
>It should not be converted, that is the problem. Or at least that is
>what I understood.
>
>The URLDecoder decodes "application/x-www-form-urlencoded"-encoded
>things, which is used inside the http body of a POST request, but not in
>URLs themselves.
>
Just tested "test+test.html" and reference Apache installation (on
www.a.o ;) have not picked up "test test.html". So you are right, and
shame on me!
Vadim
Re: cvs commit: cocoon-2.1/src/java/org/apache/cocoon/util
NetUtils.java
Posted by Bruno Dumon <br...@outerthought.org>.
On Fri, 2003-05-23 at 15:33, Vadim Gritsenko wrote:
> bruno@apache.org wrote:
>
[..]
> Why it's normal?
>
> http://java.sun.com/products/jdk/1.2/docs/api/java/net/URLDecoder.html:
> "The plus sign '|+|'is converted into a space character '| |'"
>
It should not be converted, that is the problem. Or at least that is
what I understood.
The URLDecoder decodes "application/x-www-form-urlencoded"-encoded
things, which is used inside the http body of a POST request, but not in
URLs themselves.
--
Bruno Dumon http://outerthought.org/
Outerthought - Open Source, Java & XML Competence Support Center
bruno@outerthought.org bruno@apache.org
Re: cvs commit: cocoon-2.1/src/java/org/apache/cocoon/util NetUtils.java
Posted by Vadim Gritsenko <va...@verizon.net>.
bruno@apache.org wrote:
>bruno 2003/05/23 06:08:37
>
>
...
> /**
> - * Decode a path
> + * Decode a path.
> + *
> + * <p>Interprets %XX (where XX is hexadecimal number) as UTF-8 encoded bytes.
> + * <p>The validity of the input path is not checked (i.e. characters that were not encoded will
> + * not be reported as errors).
> + * <p>This method differs from URLDecoder.decode in that it always uses UTF-8 (while URLDecoder
> + * uses the platform default encoding, often ISO-8859-1), and doesn't translate + characters to spaces.
> *
> * @param path the path to decode
> * @return the decoded path
> */
> public static String decodePath(String path) {
> - // VG: JDK1.2 MEthods throws an exception; JDK1.3 - not.
> - // http://java.sun.com/products/jdk/1.2/docs/api/java/net/URLDecoder.html#decode(java.lang.String)
> - try {
> - return java.net.URLDecoder.decode( path );
> - } catch (Exception e) {
> - return path;
> + StringBuffer translatedPath = new StringBuffer(path.length());
> + byte[] encodedchars = new byte[path.length() / 3];
> + int i = 0;
> + int length = path.length();
> + int encodedcharsLength = 0;
> + while (i < length) {
> + if (path.charAt(i) == '%') {
> + // we must process all consecutive %-encoded characters in one go, because they represent
> + // an UTF-8 encoded string, and in UTF-8 one character can be encoded as multiple bytes
> + while (i < length && path.charAt(i) == '%') {
> + if (i + 2 < length) {
> + try {
> + byte x = (byte)Integer.parseInt(path.substring(i + 1, i + 3), 16);
> + encodedchars[encodedcharsLength] = x;
> + } catch (NumberFormatException e) {
> + throw new IllegalArgumentException("NetUtils.decodePath: illegal hex characters in pattern %" + path.substring(i + 1, i + 3));
> + }
> + encodedcharsLength++;
> + i += 3;
> + } else {
> + throw new IllegalArgumentException("NetUtils.decodePath: % character should be followed by 2 hexadecimal characters.");
> + }
> + }
> + try {
> + String translatedPart = new String(encodedchars, 0, encodedcharsLength, "UTF-8");
> + translatedPath.append(translatedPart);
> + } catch (UnsupportedEncodingException e) {
> + // the situation that UTF-8 is not supported is quite theoretical, so throw a runtime exception
> + throw new RuntimeException("Problem in decodePath: UTF-8 encoding not supported.");
> + }
> + encodedcharsLength = 0;
> + } else {
> + // a normal character
> + translatedPath.append(path.charAt(i));
>
Why it's normal?
http://java.sun.com/products/jdk/1.2/docs/api/java/net/URLDecoder.html:
"The plus sign '|+|'is converted into a space character '| |'"
Vadim
Re: cvs commit: cocoon-2.1/src/java/org/apache/cocoon/util NetUtils.java
Posted by Vadim Gritsenko <va...@verizon.net>.
bruno@apache.org wrote:
>bruno 2003/05/23 06:08:37
>
>
...
> /**
> - * Decode a path
> + * Decode a path.
> + *
> + * <p>Interprets %XX (where XX is hexadecimal number) as UTF-8 encoded bytes.
> + * <p>The validity of the input path is not checked (i.e. characters that were not encoded will
> + * not be reported as errors).
> + * <p>This method differs from URLDecoder.decode in that it always uses UTF-8 (while URLDecoder
> + * uses the platform default encoding, often ISO-8859-1), and doesn't translate + characters to spaces.
> *
> * @param path the path to decode
> * @return the decoded path
> */
> public static String decodePath(String path) {
> - // VG: JDK1.2 MEthods throws an exception; JDK1.3 - not.
> - // http://java.sun.com/products/jdk/1.2/docs/api/java/net/URLDecoder.html#decode(java.lang.String)
> - try {
> - return java.net.URLDecoder.decode( path );
> - } catch (Exception e) {
> - return path;
> + StringBuffer translatedPath = new StringBuffer(path.length());
> + byte[] encodedchars = new byte[path.length() / 3];
> + int i = 0;
> + int length = path.length();
> + int encodedcharsLength = 0;
> + while (i < length) {
> + if (path.charAt(i) == '%') {
> + // we must process all consecutive %-encoded characters in one go, because they represent
> + // an UTF-8 encoded string, and in UTF-8 one character can be encoded as multiple bytes
> + while (i < length && path.charAt(i) == '%') {
> + if (i + 2 < length) {
> + try {
> + byte x = (byte)Integer.parseInt(path.substring(i + 1, i + 3), 16);
> + encodedchars[encodedcharsLength] = x;
> + } catch (NumberFormatException e) {
> + throw new IllegalArgumentException("NetUtils.decodePath: illegal hex characters in pattern %" + path.substring(i + 1, i + 3));
> + }
> + encodedcharsLength++;
> + i += 3;
> + } else {
> + throw new IllegalArgumentException("NetUtils.decodePath: % character should be followed by 2 hexadecimal characters.");
> + }
> + }
> + try {
> + String translatedPart = new String(encodedchars, 0, encodedcharsLength, "UTF-8");
> + translatedPath.append(translatedPart);
> + } catch (UnsupportedEncodingException e) {
> + // the situation that UTF-8 is not supported is quite theoretical, so throw a runtime exception
> + throw new RuntimeException("Problem in decodePath: UTF-8 encoding not supported.");
> + }
> + encodedcharsLength = 0;
> + } else {
> + // a normal character
> + translatedPath.append(path.charAt(i));
>
Why it's normal?
http://java.sun.com/products/jdk/1.2/docs/api/java/net/URLDecoder.html:
"The plus sign '|+|'is converted into a space character '| |'"
Vadim