You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@avalon.apache.org by br...@apache.org on 2003/06/07 20:28:37 UTC
cvs commit: avalon-excalibur/sourceresolve/src/java/org/apache/excalibur/source SourceUtil.java
bruno 2003/06/07 11:28:37
Modified: sourceresolve/src/java/org/apache/excalibur/source
SourceUtil.java
Log:
Added method for decoding URL encoded characters (%HH) as UTF-8.
Revision Changes Path
1.8 +56 -7 avalon-excalibur/sourceresolve/src/java/org/apache/excalibur/source/SourceUtil.java
Index: SourceUtil.java
===================================================================
RCS file: /home/cvs/avalon-excalibur/sourceresolve/src/java/org/apache/excalibur/source/SourceUtil.java,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- SourceUtil.java 20 May 2003 20:56:43 -0000 1.7
+++ SourceUtil.java 7 Jun 2003 18:28:36 -0000 1.8
@@ -54,12 +54,7 @@
*/
package org.apache.excalibur.source;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
+import java.io.*;
import java.util.BitSet;
import java.util.Iterator;
@@ -509,6 +504,60 @@
in.close();
out.flush();
out.close();
+ }
+
+ /**
+ * Decode a path.
+ *
+ * <p>Interprets %XX (where XX is hexadecimal number) as UTF-8 encoded bytes.
+ * <p>The validity of the input path is not checked (i.e. characters that
+ * were not encoded will not be reported as errors).
+ * <p>This method differs from URLDecoder.decode in that it always uses UTF-8
+ * (while URLDecoder uses the platform default encoding, often ISO-8859-1),
+ * and doesn't translate + characters to spaces.
+ *
+ * @param path the path to decode
+ * @return the decoded path
+ */
+ public static String decodePath(String path) {
+ StringBuffer translatedPath = new StringBuffer(path.length());
+ byte[] encodedchars = new byte[path.length() / 3];
+ int i = 0;
+ int length = path.length();
+ int encodedcharsLength = 0;
+ while (i < length) {
+ if (path.charAt(i) == '%') {
+ // we must process all consecutive %-encoded characters in one go, because they represent
+ // an UTF-8 encoded string, and in UTF-8 one character can be encoded as multiple bytes
+ while (i < length && path.charAt(i) == '%') {
+ if (i + 2 < length) {
+ try {
+ byte x = (byte)Integer.parseInt(path.substring(i + 1, i + 3), 16);
+ encodedchars[encodedcharsLength] = x;
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Illegal hex characters in pattern %" + path.substring(i + 1, i + 3));
+ }
+ encodedcharsLength++;
+ i += 3;
+ } else {
+ throw new IllegalArgumentException("% character should be followed by 2 hexadecimal characters.");
+ }
+ }
+ try {
+ String translatedPart = new String(encodedchars, 0, encodedcharsLength, "UTF-8");
+ translatedPath.append(translatedPart);
+ } catch (UnsupportedEncodingException e) {
+ // the situation that UTF-8 is not supported is quite theoretical, so throw a runtime exception
+ throw new RuntimeException("Problem in decodePath: UTF-8 encoding not supported.");
+ }
+ encodedcharsLength = 0;
+ } else {
+ // a normal character
+ translatedPath.append(path.charAt(i));
+ i++;
+ }
+ }
+ return translatedPath.toString();
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: cvs-unsubscribe@avalon.apache.org
For additional commands, e-mail: cvs-help@avalon.apache.org