You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@myfaces.apache.org by jw...@apache.org on 2013/02/07 21:39:17 UTC

svn commit: r1443711 - in /myfaces/trinidad/trunk/trinidad-impl/src/main: java/org/apache/myfaces/trinidadinternal/io/ xrts/org/apache/myfaces/trinidadinternal/resource/

Author: jwaldman
Date: Thu Feb  7 20:39:17 2013
New Revision: 1443711

URL: http://svn.apache.org/r1443711
Log:
MYFACES-3690 Trinidad doesn't support surrogate characters

Modified:
    myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/HTMLEscapes.java
    myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/XMLEscapes.java
    myfaces/trinidad/trunk/trinidad-impl/src/main/xrts/org/apache/myfaces/trinidadinternal/resource/LoggerBundle.xrts

Modified: myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/HTMLEscapes.java
URL: http://svn.apache.org/viewvc/myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/HTMLEscapes.java?rev=1443711&r1=1443710&r2=1443711&view=diff
==============================================================================
--- myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/HTMLEscapes.java (original)
+++ myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/HTMLEscapes.java Thu Feb  7 20:39:17 2013
@@ -21,6 +21,8 @@ package org.apache.myfaces.trinidadinter
 import java.io.IOException;
 import java.io.Writer;
 
+import org.apache.myfaces.trinidad.logging.TrinidadLogger;
+
 /**
  * Utility class for escaping HTML text.
  * <p>
@@ -92,7 +94,7 @@ public class HTMLEscapes
     for (int i = start; i < end; i++)
     {
       final char ch = text[i];
-
+      
       if (ch < 0xA0)
       {
         // text is in the US7ASCII range        
@@ -161,6 +163,31 @@ public class HTMLEscapes
         // character is in the high ISO range, so use HTML entity
         buffIndex = _addToBuffer(out, buff, buffIndex, _sISO8859_1_Entities[ch - 0xA0]);
       }
+      else if (Character.isHighSurrogate(ch)) 
+      {
+        // Some languages' characters need to use supplementary characters. For UTF-16, a "surrogate pair" is required 
+        // to represent a single supplementary character. high surrogate range is (ch >= 0xD800 && ch <= 0xDBFF)
+        // Here we check if the character is a high surrogate (the following char will be a low surrogate).
+        // We then encode the surrogate pair.
+        // If we don't do this, then surrogate characters will not display correctly . E.g., if we encoded each
+        // character separately we might have '&#55360;&#56320;', and Firefox can't recognize these 2 decimal values as 
+        // one supplementary character. Instead we need to use a single supplementary decimal value &#131072;.
+        int surrogateCodePoint = Character.codePointAt(text, i);
+        // only increase i if a valid surrogate code point is returned 
+        if (Character.isSupplementaryCodePoint(surrogateCodePoint))
+        {
+          
+          buffIndex =
+              _writeDecRef(out, buff, buffIndex, surrogateCodePoint);
+          i++;
+        }
+        else
+        {
+          // blow up if invalid utf-16 characters encountered
+          throw new IllegalArgumentException(
+            _LOG.getMessage("INVALID_SURROGATE_CHAR", new Object[] { ch, surrogateCodePoint, i }));
+        }
+      }
       else if (ch < 0xfffe) // characters fffe and ffff are considered outside of unicode
       {
         // character is outside of the ISO range
@@ -274,6 +301,30 @@ public class HTMLEscapes
         // character is in the high ISO range, so use HTML entity
         buffIndex = _addToBuffer(out, buff, buffIndex, _sISO8859_1_Entities[ch - 0xA0]);
       }
+      else if (Character.isHighSurrogate(ch)) 
+      {
+        // Some languages' characters need to use supplementary characters. For UTF-16, a "surrogate pair" is required 
+        // to represent a single supplementary character. high surrogate range is (ch >= 0xD800 && ch <= 0xDBFF)
+        // Here we check if the character is a high surrogate (the following char will be a low surrogate).
+        // We then encode the surrogate pair.
+        // If we don't do this, then surrogate characters will not display correctly . E.g., if we encoded each
+        // character separately we might have '&#55360;&#56320;', and Firefox can't recognize these 2 decimal values as 
+        // one supplementary character. Instead we need to use a single supplementary decimal value &#131072;.
+        int surrogateCodePoint = Character.codePointAt(text, i);
+        // only increase i if a valid surrogate code point is returned 
+        if (Character.isSupplementaryCodePoint(surrogateCodePoint))
+        {
+          buffIndex =
+              _writeDecRef(out, buff, buffIndex, surrogateCodePoint);
+          i++;
+        }
+        else
+        {
+          // blow up if invalid utf-16 characters encountered
+          throw new IllegalArgumentException(
+            _LOG.getMessage("INVALID_SURROGATE_CHAR", new Object[] { ch, surrogateCodePoint, i }));
+        }
+      }
       else if (ch < 0xfffe) // characters fffe and ffff are considered outside of unicode
       {
         // character is outside of the ISO range
@@ -301,13 +352,15 @@ public class HTMLEscapes
   /**
    * Writes the output as a decimal escape.  This is the same size or smaller than the hex
    * equivalent and works on versions of Netscape before 4.74. See bug #1491321.
+   * ch - the character to write out. This could be a character code point in the case of surrogate characters.
+   * Character code points are integers between 0 and 1,114,111.
    * <p>
    */
   static private int _writeDecRef(
     final Writer out,
     final char[] buff,
     int          buffIndex,
-    final char   ch
+    final int   ch
     ) throws IOException
   {
     // Formerly used String.valueOf().  This version tests out
@@ -331,8 +384,39 @@ public class HTMLEscapes
       // we have enough space for the biggest string, so use buffer
       buff[buffIndex++] = '&';
       buff[buffIndex++] = '#';
- 
-      if (i > 10000)
+      // A code point is an integer between 0 and 1,114,111, so we need to check if i is above 1000000
+      // purposely doing loop unrolling below for speed
+      if (i > 1000000)
+      {      
+        buff[buffIndex++] = (char)('0' + (i / 1000000));
+        i = i % 1000000;       
+        buff[buffIndex++] = (char)('0' + (i / 100000));
+        i = i % 100000;      
+        buff[buffIndex++] = (char)('0' + (i / 10000));
+        i = i % 10000;
+        buff[buffIndex++] = (char)('0' + (i / 1000));
+        i = i % 1000;
+        buff[buffIndex++] = (char)('0' + (i / 100));
+        i = i % 100;
+        buff[buffIndex++] = (char)('0' + (i / 10));
+        i = i % 10;
+        buff[buffIndex++] = (char)('0' + i);
+      }  
+      else if (i > 100000)
+      {      
+        buff[buffIndex++] = (char)('0' + (i / 100000));
+        i = i % 100000;      
+        buff[buffIndex++] = (char)('0' + (i / 10000));
+        i = i % 10000;
+        buff[buffIndex++] = (char)('0' + (i / 1000));
+        i = i % 1000;
+        buff[buffIndex++] = (char)('0' + (i / 100));
+        i = i % 100;
+        buff[buffIndex++] = (char)('0' + (i / 10));
+        i = i % 10;
+        buff[buffIndex++] = (char)('0' + i);
+      }      
+      else if (i > 10000)
       {      
         buff[buffIndex++] = (char)('0' + (i / 10000));
         i = i % 10000;
@@ -576,4 +660,6 @@ public class HTMLEscapes
   // the <wbr> tag. This tag inserts a no-width-space so that the browser may
   // break the line at that point.
   static private final char _UNICODE_HYPHENATION_POINT = 0x2027;
+  
+  private static final TrinidadLogger _LOG = TrinidadLogger.createTrinidadLogger(HTMLEscapes.class);
 }

Modified: myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/XMLEscapes.java
URL: http://svn.apache.org/viewvc/myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/XMLEscapes.java?rev=1443711&r1=1443710&r2=1443711&view=diff
==============================================================================
--- myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/XMLEscapes.java (original)
+++ myfaces/trinidad/trunk/trinidad-impl/src/main/java/org/apache/myfaces/trinidadinternal/io/XMLEscapes.java Thu Feb  7 20:39:17 2013
@@ -169,7 +169,7 @@ public class XMLEscapes
    */
   static void __writeDecRef(
     final Writer out,
-    final char   ch
+    final int   ch
     ) throws IOException
   {
     // Formerly used String.valueOf().  This version tests out
@@ -178,8 +178,39 @@ public class XMLEscapes
     out.write("&#");
 
     int i = ch;
-                
-    if (i > 10000)
+    // A code point is an integer between 0 and 1,114,111, so we need to check if i is above 1000000
+    // purposely doing loop unrolling below for speed
+    if (i > 1000000)
+    {      
+      out.write('0' + (i / 1000000));
+      i = i % 1000000;       
+      out.write('0' + (i / 100000));
+      i = i % 100000;      
+      out.write('0' + (i / 10000));
+      i = i % 10000;
+      out.write('0' + (i / 1000));
+      i = i % 1000;
+      out.write('0' + (i / 100));
+      i = i % 100;
+      out.write('0' + (i / 10));
+      i = i % 10;
+      out.write('0' + i);
+    }  
+    else if (i > 100000)
+    {      
+      out.write('0' + (i / 100000));
+      i = i % 100000;      
+      out.write('0' + (i / 10000));
+      i = i % 10000;
+      out.write('0' + (i / 1000));
+      i = i % 1000;
+      out.write('0' + (i / 100));
+      i = i % 100;
+      out.write('0' + (i / 10));
+      i = i % 10;
+      out.write('0' + i);
+    }    
+    else if (i > 10000)
     {      
       out.write('0' + (i / 10000));
       i = i % 10000;

Modified: myfaces/trinidad/trunk/trinidad-impl/src/main/xrts/org/apache/myfaces/trinidadinternal/resource/LoggerBundle.xrts
URL: http://svn.apache.org/viewvc/myfaces/trinidad/trunk/trinidad-impl/src/main/xrts/org/apache/myfaces/trinidadinternal/resource/LoggerBundle.xrts?rev=1443711&r1=1443710&r2=1443711&view=diff
==============================================================================
--- myfaces/trinidad/trunk/trinidad-impl/src/main/xrts/org/apache/myfaces/trinidadinternal/resource/LoggerBundle.xrts (original)
+++ myfaces/trinidad/trunk/trinidad-impl/src/main/xrts/org/apache/myfaces/trinidadinternal/resource/LoggerBundle.xrts Thu Feb  7 20:39:17 2013
@@ -1206,4 +1206,6 @@ The skin {0} specified on the requestMap
 
 <resource key="FOR_EACH_META_DATA_KEY_UNAVAILABLE">The for each meta data could not be found. Key in the collection may no longer be available. Ensure there are no stale references to the component.</resource>
 
+<resource key="INVALID_SURROGATE_CHAR">During encoding, a high surrogate character was found, but the codePoint was invalid. ch is {0}, codePoint is {1} and index is {2}.</resource>
+
 </resources>