You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@myfaces.apache.org by jw...@apache.org on 2013/02/07 21:39:17 UTC
svn commit: r1443711 - in /myfaces/trinidad/trunk/trinidad-impl/src/main:
java/org/apache/myfaces/trinidadinternal/io/
xrts/org/apache/myfaces/trinidadinternal/resource/
Author: jwaldman
Date: Thu Feb 7 20:39:17 2013
New Revision: 1443711
URL: http://svn.apache.org/r1443711
Log:
MYFACES-3690 Trinidad doesn't support surrogate characters
Modified:
myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/HTMLEscapes.java
myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/XMLEscapes.java
myfaces/trinidad/trunk/trinidad-impl/src/main/xrts/org/apache/myfaces/trinidadinternal/resource/LoggerBundle.xrts
Modified: myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/HTMLEscapes.java
URL: http://svn.apache.org/viewvc/myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/HTMLEscapes.java?rev=1443711&r1=1443710&r2=1443711&view=diff
==============================================================================
--- myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/HTMLEscapes.java (original)
+++ myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/HTMLEscapes.java Thu Feb 7 20:39:17 2013
@@ -21,6 +21,8 @@ package org.apache.myfaces.trinidadinter
import java.io.IOException;
import java.io.Writer;
+import org.apache.myfaces.trinidad.logging.TrinidadLogger;
+
/**
* Utility class for escaping HTML text.
* <p>
@@ -92,7 +94,7 @@ public class HTMLEscapes
for (int i = start; i < end; i++)
{
final char ch = text[i];
-
+
if (ch < 0xA0)
{
// text is in the US7ASCII range
@@ -161,6 +163,31 @@ public class HTMLEscapes
// character is in the high ISO range, so use HTML entity
buffIndex = _addToBuffer(out, buff, buffIndex, _sISO8859_1_Entities[ch - 0xA0]);
}
+ else if (Character.isHighSurrogate(ch))
+ {
+ // Some languages' characters need to use supplementary characters. For UTF-16, a "surrogate pair" is required
+ // to represent a single supplementary character. high surrogate range is (ch >= 0xD800 && ch <= 0xDBFF)
+ // Here we check if the character is a high surrogate (the following char will be a low surrogate).
+ // We then encode the surrogate pair.
+ // If we don't do this, then surrogate characters will not display correctly . E.g., if we encoded each
+ // character separately we might have '��', and Firefox can't recognize these 2 decimal values as
+ // one supplementary character. Instead we need to use a single supplementary decimal value 𠀀.
+ int surrogateCodePoint = Character.codePointAt(text, i);
+ // only increase i if a valid surrogate code point is returned
+ if (Character.isSupplementaryCodePoint(surrogateCodePoint))
+ {
+
+ buffIndex =
+ _writeDecRef(out, buff, buffIndex, surrogateCodePoint);
+ i++;
+ }
+ else
+ {
+ // blow up if invalid utf-16 characters encountered
+ throw new IllegalArgumentException(
+ _LOG.getMessage("INVALID_SURROGATE_CHAR", new Object[] { ch, surrogateCodePoint, i }));
+ }
+ }
else if (ch < 0xfffe) // characters fffe and ffff are considered outside of unicode
{
// character is outside of the ISO range
@@ -274,6 +301,30 @@ public class HTMLEscapes
// character is in the high ISO range, so use HTML entity
buffIndex = _addToBuffer(out, buff, buffIndex, _sISO8859_1_Entities[ch - 0xA0]);
}
+ else if (Character.isHighSurrogate(ch))
+ {
+ // Some languages' characters need to use supplementary characters. For UTF-16, a "surrogate pair" is required
+ // to represent a single supplementary character. high surrogate range is (ch >= 0xD800 && ch <= 0xDBFF)
+ // Here we check if the character is a high surrogate (the following char will be a low surrogate).
+ // We then encode the surrogate pair.
+ // If we don't do this, then surrogate characters will not display correctly . E.g., if we encoded each
+ // character separately we might have '��', and Firefox can't recognize these 2 decimal values as
+ // one supplementary character. Instead we need to use a single supplementary decimal value 𠀀.
+ int surrogateCodePoint = Character.codePointAt(text, i);
+ // only increase i if a valid surrogate code point is returned
+ if (Character.isSupplementaryCodePoint(surrogateCodePoint))
+ {
+ buffIndex =
+ _writeDecRef(out, buff, buffIndex, surrogateCodePoint);
+ i++;
+ }
+ else
+ {
+ // blow up if invalid utf-16 characters encountered
+ throw new IllegalArgumentException(
+ _LOG.getMessage("INVALID_SURROGATE_CHAR", new Object[] { ch, surrogateCodePoint, i }));
+ }
+ }
else if (ch < 0xfffe) // characters fffe and ffff are considered outside of unicode
{
// character is outside of the ISO range
@@ -301,13 +352,15 @@ public class HTMLEscapes
/**
* Writes the output as a decimal escape. This is the same size or smaller than the hex
* equivalent and works on versions of Netscape before 4.74. See bug #1491321.
+ * ch - the character to write out. This could be a character code point in the case of surrogate characters.
+ * Character code points are integers between 0 and 1,114,111.
* <p>
*/
static private int _writeDecRef(
final Writer out,
final char[] buff,
int buffIndex,
- final char ch
+ final int ch
) throws IOException
{
// Formerly used String.valueOf(). This version tests out
@@ -331,8 +384,39 @@ public class HTMLEscapes
// we have enough space for the biggest string, so use buffer
buff[buffIndex++] = '&';
buff[buffIndex++] = '#';
-
- if (i > 10000)
+ // A code point is an integer between 0 and 1,114,111, so we need to check if i is above 1000000
+ // purposely doing loop unrolling below for speed
+ if (i > 1000000)
+ {
+ buff[buffIndex++] = (char)('0' + (i / 1000000));
+ i = i % 1000000;
+ buff[buffIndex++] = (char)('0' + (i / 100000));
+ i = i % 100000;
+ buff[buffIndex++] = (char)('0' + (i / 10000));
+ i = i % 10000;
+ buff[buffIndex++] = (char)('0' + (i / 1000));
+ i = i % 1000;
+ buff[buffIndex++] = (char)('0' + (i / 100));
+ i = i % 100;
+ buff[buffIndex++] = (char)('0' + (i / 10));
+ i = i % 10;
+ buff[buffIndex++] = (char)('0' + i);
+ }
+ else if (i > 100000)
+ {
+ buff[buffIndex++] = (char)('0' + (i / 100000));
+ i = i % 100000;
+ buff[buffIndex++] = (char)('0' + (i / 10000));
+ i = i % 10000;
+ buff[buffIndex++] = (char)('0' + (i / 1000));
+ i = i % 1000;
+ buff[buffIndex++] = (char)('0' + (i / 100));
+ i = i % 100;
+ buff[buffIndex++] = (char)('0' + (i / 10));
+ i = i % 10;
+ buff[buffIndex++] = (char)('0' + i);
+ }
+ else if (i > 10000)
{
buff[buffIndex++] = (char)('0' + (i / 10000));
i = i % 10000;
@@ -576,4 +660,6 @@ public class HTMLEscapes
// the <wbr> tag. This tag inserts a no-width-space so that the browser may
// break the line at that point.
static private final char _UNICODE_HYPHENATION_POINT = 0x2027;
+
+ private static final TrinidadLogger _LOG = TrinidadLogger.createTrinidadLogger(HTMLEscapes.class);
}
Modified: myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/XMLEscapes.java
URL: http://svn.apache.org/viewvc/myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/XMLEscapes.java?rev=1443711&r1=1443710&r2=1443711&view=diff
==============================================================================
--- myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/XMLEscapes.java (original)
+++ myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/XMLEscapes.java Thu Feb 7 20:39:17 2013
@@ -169,7 +169,7 @@ public class XMLEscapes
*/
static void __writeDecRef(
final Writer out,
- final char ch
+ final int ch
) throws IOException
{
// Formerly used String.valueOf(). This version tests out
@@ -178,8 +178,39 @@ public class XMLEscapes
out.write("&#");
int i = ch;
-
- if (i > 10000)
+ // A code point is an integer between 0 and 1,114,111, so we need to check if i is above 1000000
+ // purposely doing loop unrolling below for speed
+ if (i > 1000000)
+ {
+ out.write('0' + (i / 1000000));
+ i = i % 1000000;
+ out.write('0' + (i / 100000));
+ i = i % 100000;
+ out.write('0' + (i / 10000));
+ i = i % 10000;
+ out.write('0' + (i / 1000));
+ i = i % 1000;
+ out.write('0' + (i / 100));
+ i = i % 100;
+ out.write('0' + (i / 10));
+ i = i % 10;
+ out.write('0' + i);
+ }
+ else if (i > 100000)
+ {
+ out.write('0' + (i / 100000));
+ i = i % 100000;
+ out.write('0' + (i / 10000));
+ i = i % 10000;
+ out.write('0' + (i / 1000));
+ i = i % 1000;
+ out.write('0' + (i / 100));
+ i = i % 100;
+ out.write('0' + (i / 10));
+ i = i % 10;
+ out.write('0' + i);
+ }
+ else if (i > 10000)
{
out.write('0' + (i / 10000));
i = i % 10000;
Modified: myfaces/trinidad/trunk/trinidad-impl/src/main/xrts/org/apache/myfaces/trinidadinternal/resource/LoggerBundle.xrts
URL: http://svn.apache.org/viewvc/myfaces/trinidad/trunk/trinidad-impl/src/main/xrts/org/apache/myfaces/trinidadinternal/resource/LoggerBundle.xrts?rev=1443711&r1=1443710&r2=1443711&view=diff
==============================================================================
--- myfaces/trinidad/trunk/trinidad-impl/src/main/xrts/org/apache/myfaces/trinidadinternal/resource/LoggerBundle.xrts (original)
+++ myfaces/trinidad/trunk/trinidad-impl/src/main/xrts/org/apache/myfaces/trinidadinternal/resource/LoggerBundle.xrts Thu Feb 7 20:39:17 2013
@@ -1206,4 +1206,6 @@ The skin {0} specified on the requestMap
<resource key="FOR_EACH_META_DATA_KEY_UNAVAILABLE">The for each meta data could not be found. Key in the collection may no longer be available. Ensure there are no stale references to the component.</resource>
+<resource key="INVALID_SURROGATE_CHAR">During encoding, a high surrogate character was found, but the codePoint was invalid. ch is {0}, codePoint is {1} and index is {2}.</resource>
+
</resources>