You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@myfaces.apache.org by lu...@apache.org on 2009/05/28 04:36:25 UTC
svn commit: r779412 - in /myfaces/shared/trunk_3.0.x/core/src:
main/java/org/apache/myfaces/shared/renderkit/html/
main/java/org/apache/myfaces/shared/renderkit/html/util/
test/java/org/apache/myfaces/shared/renderkit/html/util/
Author: lu4242
Date: Thu May 28 02:36:25 2009
New Revision: 779412
URL: http://svn.apache.org/viewvc?rev=779412&view=rev
Log:
MYFACES-1841 HtmlResponseWriterImpl.writeURIAttribute does not perform proper URLs encoding ( ex: & should be encoded in &)
Modified:
myfaces/shared/trunk_3.0.x/core/src/main/java/org/apache/myfaces/shared/renderkit/html/HtmlResponseWriterImpl.java
myfaces/shared/trunk_3.0.x/core/src/main/java/org/apache/myfaces/shared/renderkit/html/util/HTMLEncoder.java
myfaces/shared/trunk_3.0.x/core/src/test/java/org/apache/myfaces/shared/renderkit/html/util/HTMLEncoderTest.java
Modified: myfaces/shared/trunk_3.0.x/core/src/main/java/org/apache/myfaces/shared/renderkit/html/HtmlResponseWriterImpl.java
URL: http://svn.apache.org/viewvc/myfaces/shared/trunk_3.0.x/core/src/main/java/org/apache/myfaces/shared/renderkit/html/HtmlResponseWriterImpl.java?rev=779412&r1=779411&r2=779412&view=diff
==============================================================================
--- myfaces/shared/trunk_3.0.x/core/src/main/java/org/apache/myfaces/shared/renderkit/html/HtmlResponseWriterImpl.java (original)
+++ myfaces/shared/trunk_3.0.x/core/src/main/java/org/apache/myfaces/shared/renderkit/html/HtmlResponseWriterImpl.java Thu May 28 02:36:25 2009
@@ -385,7 +385,8 @@
}
}
*/
- _writer.write(strValue);
+ //_writer.write(strValue);
+ _writer.write(org.apache.myfaces.shared.renderkit.html.util.HTMLEncoder.encodeURIAtributte(strValue, _characterEncoding));
}
_writer.write('"');
}
Modified: myfaces/shared/trunk_3.0.x/core/src/main/java/org/apache/myfaces/shared/renderkit/html/util/HTMLEncoder.java
URL: http://svn.apache.org/viewvc/myfaces/shared/trunk_3.0.x/core/src/main/java/org/apache/myfaces/shared/renderkit/html/util/HTMLEncoder.java?rev=779412&r1=779411&r2=779412&view=diff
==============================================================================
--- myfaces/shared/trunk_3.0.x/core/src/main/java/org/apache/myfaces/shared/renderkit/html/util/HTMLEncoder.java (original)
+++ myfaces/shared/trunk_3.0.x/core/src/main/java/org/apache/myfaces/shared/renderkit/html/util/HTMLEncoder.java Thu May 28 02:36:25 2009
@@ -18,7 +18,9 @@
*/
package org.apache.myfaces.shared.renderkit.html.util;
+import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.io.OutputStreamWriter;
import java.io.Writer;
/**
@@ -274,4 +276,394 @@
writer.write(sb.toString());
}
}
+
+ private static final String HEX_CHARSET = "0123456789ABCDEF";
+
+ private static final String UTF8 = "UTF-8";
+
+ /**
+ * Encode an URI, escaping or percent-encoding all required characters and
+ * following the rules mentioned on RFC 3986.
+ *
+ * @param string
+ * @param encodeNonLatin
+ * @return
+ * @throws IOException
+ */
+ public static String encodeURIAtributte(final String string, final String characterEncoding)
+ throws IOException
+ {
+ StringBuilder sb = null; //create later on demand
+ String app;
+ char c;
+ boolean endLoop = false;
+ for (int i = 0; i < string.length (); ++i)
+ {
+ app = null;
+ c = string.charAt(i);
+
+ // This are the guidelines to be taken into account by this algorithm to encode:
+
+ // RFC 2396 Section 2.4.3 Excluded US-ASCII Characters
+ //
+ // control = <US-ASCII coded characters 00-1F and 7F hexadecimal>
+ // space = <US-ASCII coded character 20 hexadecimal>
+ // delims = "<" | ">" | "#" | "%" | <">
+ // %3C %3E %23 %25 %22
+ // unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
+ // %7D %7B %7C %5C %5E %5B %5D %60
+ //
+ // ".... Data corresponding to excluded characters must be escaped in order to
+ // be properly represented within a URI....."
+
+ // RFC 3986 Section 3. Syntax Components
+ //
+ // "... The generic URI syntax consists of a hierarchical sequence of
+ // components referred to as the scheme, authority, path, query, and
+ // fragment.
+ //
+ // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ //
+ // hier-part = "//" authority path-abempty
+ // / path-absolute
+ // / path-rootless
+ // / path-empty
+ // ...."
+
+ // RFC 3986 Section 2.2:
+ // Reserved characters (should not be percent-encoded)
+ // reserved = gen-delims / sub-delims
+ // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ // %3A %2F %3F %23 %5B %5D %40
+ // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ // %21 %24 %26 %27 %28 %29 %2A %2B %2C %3B %3D
+
+ // Note than chars "[" and "]" are mentioned as they should be escaped on RFC 2396,
+ // but on the part D. Changes from RFC 2396 says about this chars (used on IPv6)
+ // "...those rules were redefined to directly specify the characters allowed...."
+ // There is also other characters moved from excluded list to reserved:
+ // "[" / "]" / "#"
+
+ // RFC 3986 Section 2.3:
+ // "... for consistency, percent-encoded octets in the ranges of ALPHA
+ // (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
+ // underscore (%5F), or tilde (%7E) should not be created by URI
+ // producers...."
+
+ // RFC 3986 Section 3.2.2. Host
+
+ // host = IP-literal / IPv4address / reg-name
+
+ // The reg-name syntax allows percent-encoded octets in order to
+ // represent non-ASCII registered names in a uniform way that is
+ // independent of the underlying name resolution technology. Non-ASCII
+ // characters must first be encoded according to UTF-8 [STD63], and then
+ // each octet of the corresponding UTF-8 sequence must be percent-
+ // encoded to be represented as URI characters. URI producing
+ // applications must not use percent-encoding in host unless it is used
+ // to represent a UTF-8 character sequence.
+
+ // RFC 3986 Section 3.4 Query
+ // query = *( pchar / "/" / "?" )
+ //
+ // "... However, as query components are often used to carry identifying information
+ // in the form of "key=value" pairs and one frequently used value is a reference to
+ // another URI, it is sometimes better for usability to avoid percent-encoding those characters....."
+ //
+ // RFC 3986 Section 2.5 Identifying Data (Apply to query section)
+ //
+ // When a new URI scheme defines a component that represents textual
+ // data consisting of characters from the Universal Character Set [UCS],
+ // the data should first be encoded as octets according to the UTF-8
+ // character encoding [STD63]; then only those octets that do not
+ // correspond to characters in the unreserved set should be percent-
+ // encoded. For example, the character A would be represented as "A",
+ // the character LATIN CAPITAL LETTER A WITH GRAVE would be represented
+ // as "%C3%80", and the character KATAKANA LETTER A would be represented
+ // as "%E3%82%A2".
+ //
+ // RFC 3986 Section 3.5 Fragment
+ // fragment = *( pchar / "/" / "?" )
+ //
+ // Note that follows the same as query
+
+ // Based on the extracts the strategy to apply on this method is:
+ //
+ // On scheme ":" hier-part
+ //
+ // Escape or percent encode chars inside :
+ //
+ // - From %00 to %20,
+ // - <"> %22, "%" %25 (If there is encode of "%", there is a risk of
+ // duplicate encoding, encode it when we are sure
+ // that there are not encoded twice)
+ // - "<" %3C, ">" %3E
+ // - "\" %5C, "^" %5E, "`" %60
+ // - "{" %7B, "|" %7C, "}" %7D
+ // - From %7F ad infinitum (characters from %100 to infinitum should not be used in this
+ // part of an URI, but it is preferred to encode it that omit it).
+ //
+ // The remaining characters must not be encoded
+ //
+ // Characters after ? or # should be percent encoding but only the necessary ones:
+ //
+ // - From %00 to %20 (' ' %20 could encode as +, but %20 also works, so we keep %20)
+ // - <"> %22, "%" %25 (If there is encode of "%", there is a risk of
+ // duplicate encoding, encode it when we are sure
+ // that there are not encoded twice)
+ // - "<" %3C, ">" %3E,
+ // - "\" %5C, "^" %5E, "`" %60
+ // - "{" %7B, "|" %7C, "}" %7D
+ // - From %7F ad infinitum (each character as many bytes as necessary but take into account
+ // that a single char should contain 2,3 or more bytes!. This data should be encoded
+ // translating from the document character encoding to percent encoding, because this values
+ // could be retrieved from httpRequest.getParameter() and it uses the current character encoding
+ // for decode values)
+ //
+ // "&" should be encoded as "&" because this link is inside an html page, and
+ // put only & is invalid in this context.
+
+ if ( (c <= (char)0x20) || (c >= (char)0x7F) ||
+ c == '"' || c == '<' ||
+ c == '>' || c == '\\' || c == '^' || c == '`' ||
+ c == '{' || c == '|' || c == '}')
+ {
+ // The percent encoding on this part should be done using UTF-8 charset
+ // as RFC 3986 Section 3.2.2 says.
+ // Also there is a reference on
+ // http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
+ // that recommend use of UTF-8 instead the document character encoding.
+ // Jetty set by default UTF-8 (see http://jira.codehaus.org/browse/JETTY-113)
+ app = percentEncode(c, "UTF-8");
+ }
+ else if (c == '%')
+ {
+ if (i + 2 < string.length())
+ {
+ char c1 = string.charAt(i+1);
+ char c2 = string.charAt(i+2);
+ if ((( c1 >= '0' && c1 <='9') || (c1 >='A' && c1 <='Z')) &&
+ (( c2 >= '0' && c2 <='9') || (c2 >='A' && c2 <='Z')))
+ {
+ // do not percent encode, because it could be already encoded
+ // and we don't want encode it twice
+ }
+ else
+ {
+ app = percentEncode(c, UTF8);
+ }
+ }
+ else
+ {
+ app = percentEncode(c, UTF8);
+ }
+ }
+ else if (c == '?' || c == '#')
+ {
+ if (i+1 < string.length())
+ {
+ // The remaining part of the URI are data that should be encoded
+ // using the document character encoding.
+ app = c + encodeURIQuery(string.substring(i+1), characterEncoding);
+ endLoop = true;
+ }
+ }
+ else
+ {
+ //No encoding, just do nothing, char will be added later.
+ }
+
+ if (app != null)
+ {
+ if (sb == null)
+ {
+ sb = new StringBuilder(string.substring(0, i));
+ }
+ sb.append(app);
+ } else {
+ if (sb != null)
+ {
+ sb.append(c);
+ }
+ }
+ if (endLoop)
+ {
+ break;
+ }
+ }
+ if (sb == null)
+ {
+ return string;
+ }
+ else
+ {
+ return sb.toString();
+ }
+ }
+
+ /**
+ * Encode a unicode char value in percentEncode, decoding its bytes using a specified
+ * characterEncoding.
+ *
+ * @param c
+ * @param characterEncoding
+ * @return
+ */
+ private static String percentEncode(char c, String characterEncoding)
+ {
+ String app = null;
+ if (c > (char)((short)0x007F))
+ {
+ //percent encode in the proper encoding to be consistent
+ app = percentEncodeNonUsAsciiCharacter(c, characterEncoding);
+ }
+ else
+ {
+ //percent encode US-ASCII char (0x00-0x7F range)
+ app = "%" + HEX_CHARSET.charAt( ((c >> 0x4) % 0x10)) +HEX_CHARSET.charAt(c % 0x10);
+ }
+ return app;
+ }
+
+ private static String percentEncodeNonUsAsciiCharacter(char c, String characterEncoding)
+ {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(10);
+ StringBuffer builder = new StringBuffer();
+ try
+ {
+ OutputStreamWriter writer = new OutputStreamWriter(baos,characterEncoding);
+ writer.write(c);
+ writer.flush();
+ }
+ catch(IOException e)
+ {
+ baos.reset();
+ return null;
+ }
+
+ byte [] byteArray = baos.toByteArray();
+ for (int i=0; i < byteArray.length; i++)
+ {
+ builder.append('%');
+ builder.append(HEX_CHARSET.charAt( (( ((short) byteArray[i] & 0xFF ) >> 0x4) % 0x10)) );
+ builder.append(HEX_CHARSET.charAt( ((short) byteArray[i] & 0xFF ) % 0x10));
+ }
+
+ return builder.toString();
+ }
+
+ /**
+ * Encode the query part using the document charset encoding provided.
+ *
+ *
+ * @param string
+ * @param characterEncoding
+ * @return
+ */
+ private static String encodeURIQuery(final String string, final String characterEncoding)
+ {
+ StringBuilder sb = null; //create later on demand
+ String app;
+ char c;
+ boolean endLoop = false;
+ for (int i = 0; i < string.length (); ++i)
+ {
+ app = null;
+ c = string.charAt(i);
+
+ // - From %00 to %20 (' ' %20 could encode as +, but %20 also works, so we keep %20)
+ // - <"> %22 (If there is encode of "%", there is a risk of duplicate encoding, so we make easier and omit this one)
+ // - "<" %3C, ">" %3E,
+ // - "\" %5C, "^" %5E, "`" %60
+ // - "{" %7B, "|" %7C, "}" %7D
+ // - From %7F ad infinitum (each character as many bytes as necessary but take into account
+ // that a single char should contain 2,3 or more bytes!. This data should be encoded translating from the document
+ // character encoding to percent encoding)
+ //
+ // "&" should be encoded as "&" because this link is inside an html page, and
+ // put & is invalid in this context
+
+ if ( (c <= (char)0x20) || (c >= (char)0x7F) ||
+ c == '"' || c == '<' ||
+ c == '>' || c == '\\' || c == '^' || c == '`' ||
+ c == '{' || c == '|' || c == '}')
+ {
+ // The percent encoding on this part should be done using UTF-8 charset
+ // as RFC 3986 Section 3.2.2 says
+ app = percentEncode(c, characterEncoding);
+ }
+ else if (c == '%')
+ {
+ if (i + 2 < string.length())
+ {
+ char c1 = string.charAt(i+1);
+ char c2 = string.charAt(i+2);
+ if ((( c1 >= '0' && c1 <='9') || (c1 >='A' && c1 <='Z')) &&
+ (( c2 >= '0' && c2 <='9') || (c2 >='A' && c2 <='Z')))
+ {
+ // do not percent encode, because it could be already encoded
+ }
+ else
+ {
+ app = percentEncode(c, characterEncoding);
+ }
+ }
+ else
+ {
+ app = percentEncode(c, characterEncoding);
+ }
+ }
+ else if (c == '&')
+ {
+ if (i+4 < string.length() )
+ {
+ if ('a' == string.charAt(i+1) &&
+ 'm' == string.charAt(i+2) &&
+ 'p' == string.charAt(i+3) &&
+ ';' == string.charAt(i+4))
+ {
+ //Skip
+ }
+ else
+ {
+ app = "&";
+ }
+ }
+ else
+ {
+ app = "&";
+ }
+ }
+ else
+ {
+ //No encoding, just do nothing, char will be added later.
+ }
+
+ if (app != null)
+ {
+ if (sb == null)
+ {
+ sb = new StringBuilder(string.substring(0, i));
+ }
+ sb.append(app);
+ } else {
+ if (sb != null)
+ {
+ sb.append(c);
+ }
+ }
+ if (endLoop)
+ {
+ break;
+ }
+ }
+ if (sb == null)
+ {
+ return string;
+ }
+ else
+ {
+ return sb.toString();
+ }
+ }
}
Modified: myfaces/shared/trunk_3.0.x/core/src/test/java/org/apache/myfaces/shared/renderkit/html/util/HTMLEncoderTest.java
URL: http://svn.apache.org/viewvc/myfaces/shared/trunk_3.0.x/core/src/test/java/org/apache/myfaces/shared/renderkit/html/util/HTMLEncoderTest.java?rev=779412&r1=779411&r2=779412&view=diff
==============================================================================
--- myfaces/shared/trunk_3.0.x/core/src/test/java/org/apache/myfaces/shared/renderkit/html/util/HTMLEncoderTest.java (original)
+++ myfaces/shared/trunk_3.0.x/core/src/test/java/org/apache/myfaces/shared/renderkit/html/util/HTMLEncoderTest.java Thu May 28 02:36:25 2009
@@ -16,9 +16,13 @@
package org.apache.myfaces.shared.renderkit.html.util;
+import java.io.ByteArrayOutputStream;
import java.io.CharArrayWriter;
import java.io.IOException;
-import java.io.Writer;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
import org.apache.shale.test.base.AbstractJsfTestCase;
@@ -236,4 +240,105 @@
assertEquals(expected[i], actual[i]);
}
}
+
+ public void testSimpleWriteURIAttribute() throws Exception
+ {
+ String cad1 = "http://myfaces.apache.org/hello.jsf?key1=val&key2=val2#id";
+ String cad2 = "http://myfaces.apache.org/hello.jsf?key1=val&key2=val2#id";
+ String cad3 = HTMLEncoder.encodeURIAtributte(cad1,"UTF-8");
+ assertEquals(cad2, cad3);
+ }
+
+ public void testUsAsciiEscapedCharactersBeforeQuery() throws Exception
+ {
+ // Escape
+ // - From %00 to %20,
+ // - <"> %22, "%" %25
+ // - "<" %3C, ">" %3E,
+ // - "\" %5C, "^" %5E, "`" %60
+ // - "{" %7B, "|" %7C, "}" %7D
+ // - From %7F ad infinitum
+ String cad1 = "?key=\"%<>\\`{|}^\n "; //Omit %
+ String cad2 = "?key=%22%25%3C%3E%5C%60%7B%7C%7D%5E%0A%20";
+ String cad3 = HTMLEncoder.encodeURIAtributte(cad1,"UTF-8");
+ assertEquals(cad2, cad3);
+
+ String cad4 = "\"%<>\\`{|}^\n ";
+ String cad5 = "%22%25%3C%3E%5C%60%7B%7C%7D%5E%0A%20";
+ String cad6 = HTMLEncoder.encodeURIAtributte(cad4,"UTF-8");
+ assertEquals(cad5, cad6);
+
+
+ }
+
+ public void testWriteNonUsAsciiOnURIAttribute() throws Exception
+ {
+ // Character ü in ISO-8859-1 is %FC but on UTF-8 is %C3%BC. In this case,
+ // it should encode as %C3%BC
+ String cad1 = "ü";//"http://myfaces.apache.org/heüll o.jsf?key=val#id";
+ String cad2 = "%C3%BC";//"http://myfaces.apache.org/he%FCll%20o.jsf?key=val#id";
+ String cad3 = HTMLEncoder.encodeURIAtributte(cad1,"UTF-8");
+ assertEquals(cad2, cad3);
+
+ }
+
+ public void testReservedCharactersOnURIAttribute() throws Exception
+ {
+ //Reserved
+ // Reserved characters (should not be percent-encoded)
+ // reserved = gen-delims / sub-delims
+ // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ // %3A %2F %3F %23 %5B %5D %40
+ // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ // %21 %24 %26 %27 %28 %29 %2A %2B %2C %3B %3D
+
+ String cad1 = "?key=:/[]@!$'()*+,;="; //Omit &
+ String cad2 = HTMLEncoder.encodeURIAtributte(cad1,"UTF-8");
+ assertEquals(cad1, cad2);
+
+ String cad7 = ":/[]@!$&'()*+,;=";
+ String cad8 = HTMLEncoder.encodeURIAtributte(cad7,"UTF-8");
+ assertEquals(cad7, cad8);
+ }
+
+ public void testNonEncodedCharactersOnURIAttribute() throws Exception
+ {
+ // "... for consistency, percent-encoded octets in the ranges of ALPHA
+ // (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
+ // underscore (%5F), or tilde (%7E) should not be created by URI
+ // producers...."
+ String cad1 = "?key=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~";
+ String cad2 = HTMLEncoder.encodeURIAtributte(cad1,"UTF-8");
+ assertEquals(cad1, cad2);
+
+ String cad3 = "#somefile?key=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~";
+ String cad4 = HTMLEncoder.encodeURIAtributte(cad3,"UTF-8");
+ assertEquals(cad3, cad4);
+
+ String cad5 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~";
+ String cad6 = HTMLEncoder.encodeURIAtributte(cad5,"UTF-8");
+ assertEquals(cad5, cad6);
+ }
+
+ public void testWriteURIAttribute() throws Exception
+ {
+ //Note char 256 or 0x100 should not be passed or percent encoded, because it is not
+ //valid for URIs.
+ String cad11 = "¡¢£¤¥¦§¨©ª«¬®¯°±"+((char)(0xFF))+((char)(0x100));
+ String cad12 = "%C2%A1%C2%A2%C2%A3%C2%A4%C2%A5%C2%A6%C2%A7%C2%A8%C2%A9%C2%AA%C2%AB%C2%AC%C2%AD"+
+ "%C2%AE%C2%AF%C2%B0%C2%B1%C3%BF%C4%80";
+ String cad13 = HTMLEncoder.encodeURIAtributte(cad11,"UTF-8");
+ assertEquals(cad12, cad13);
+
+ String cad1 = "?key=¡¢£¤¥¦§¨©ª«¬®¯°±"+((char)(0xFF))+((char)(0x100));
+ String cad2 = "?key=%C2%A1%C2%A2%C2%A3%C2%A4%C2%A5%C2%A6%C2%A7%C2%A8%C2%A9%C2%AA%C2%AB%C2%AC%C2%AD"+
+ "%C2%AE%C2%AF%C2%B0%C2%B1%C3%BF%C4%80";
+ String cad3 = HTMLEncoder.encodeURIAtributte(cad1,"UTF-8");
+ assertEquals(cad2, cad3);
+
+ //String cad14 = "http://myfaces.apache.org/page.jsf?key="+((char)0xFF)+((char)0x100);
+ //String cad15 = HTMLEncoder.encodeURIAtributte(cad14,false);
+ //assertEquals(cad14,cad15);
+ }
+
}
\ No newline at end of file