You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xalan.apache.org by mi...@apache.org on 2003/10/05 16:01:58 UTC

cvs commit: xml-xalan/java/src/org/apache/xml/serializer ToStream.java CharInfo.java ToHTMLStream.java

minchau     2003/10/05 07:01:57

  Modified:    java/src/org/apache/xml/serializer Tag: xslt20-compiled
                        ToStream.java CharInfo.java ToHTMLStream.java
  Log:
  Fix for bugzila 22623 (tab in attribute).
  
  Revision  Changes    Path
  No                   revision
  No                   revision
  1.21.2.2  +34 -23    xml-xalan/java/src/org/apache/xml/serializer/ToStream.java
  
  Index: ToStream.java
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/ToStream.java,v
  retrieving revision 1.21.2.1
  retrieving revision 1.21.2.2
  diff -u -r1.21.2.1 -r1.21.2.2
  --- ToStream.java	26 Sep 2003 19:53:17 -0000	1.21.2.1
  +++ ToStream.java	5 Oct 2003 14:01:57 -0000	1.21.2.2
  @@ -1061,6 +1061,8 @@
        * @param i index into character array.
        * @param chars non-null reference to character array.
        * @param len length of chars.
  +     * @param fromTextNode true if the characters being processed
  +     * are from a text node, false if they are from an attribute value
        * @param escLF true if the linefeed should be escaped.
        *
        * @return i+1 if the character was written, else i.
  @@ -1073,6 +1075,7 @@
           int i,
           char[] chars,
           int len,
  +        boolean fromTextNode,
           boolean escLF)
           throws IOException
       {
  @@ -1083,7 +1086,9 @@
           }
           else
           {
  -            if (m_charInfo.isSpecial(ch))
  +            // if this is text node character and a special one of those,
  +            // or if this is a character from attribute value and a special one of those
  +            if ((fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))
               {
                   String entityRef = m_charInfo.getEntityNameForChar(ch);
   
  @@ -1474,9 +1479,9 @@
                    * processing for dirty characters here as for non-whitespace.
                    * 
                    */
  -                if (!m_charInfo.isASCIIClean(ch1))
  +                if (!m_charInfo.isTextASCIIClean(ch1))
                   {
  -                    lastDirty = processDirty(chars,end, i,ch1, lastDirty);
  +                    lastDirty = processDirty(chars,end, i,ch1, lastDirty, true);
                       i = lastDirty;
                   }
               }
  @@ -1500,7 +1505,7 @@
                       char ch2;
                       while (i<end 
                               && ((ch2 = chars[i])<127)
  -                            && m_charInfo.isASCIIClean(ch2))
  +                            && m_charInfo.isTextASCIIClean(ch2))
                               i++;
                       if (i == end)
                           break;
  @@ -1509,14 +1514,14 @@
                   final char ch = chars[i];
                   if (
                   
  -                    (escapingNotNeeded(ch) && (!m_charInfo.isSpecial(ch)))
  +                    (escapingNotNeeded(ch) && (!m_charInfo.isSpecialTextChar(ch)))
                           || ('"' == ch))
                   {
                       ; // a character needing no special processing
                   }
                   else
                   {
  -                    lastDirty = processDirty(chars,end, i, ch, lastDirty);
  +                    lastDirty = processDirty(chars,end, i, ch, lastDirty, true);
                       i = lastDirty;
                   }
               }
  @@ -1551,6 +1556,8 @@
        * @param i the index of the dirty character
        * @param ch the character in chars[i]
        * @param lastDirty the last dirty character previous to i
  +     * @param fromTextNode true if the characters being processed are
  +     * from a text node, false if they are from an attribute value.
        * @return the index of the last character processed
        */
       private int processDirty(
  @@ -1558,7 +1565,8 @@
           int end,
           int i, 
           char ch,
  -        int lastDirty) throws IOException
  +        int lastDirty,
  +        boolean fromTextNode) throws IOException
       {
           int startClean = lastDirty + 1;
           // if we have some clean characters accumulated
  @@ -1570,7 +1578,7 @@
           }
   
           // process the "dirty" character
  -        if (CharInfo.S_LINEFEED == ch)
  +        if (CharInfo.S_LINEFEED == ch && fromTextNode)
           {
               m_writer.write(m_lineSep, 0, m_lineSepLen);
           }
  @@ -1583,6 +1591,7 @@
                       i,
                       chars,
                       end,
  +                    fromTextNode,
                       false);
               i = startClean - 1;
           }
  @@ -1616,6 +1625,9 @@
        * @param i index into character array.
        * @param chars non-null reference to character array.
        * @param len length of chars.
  +     * @param fromTextNode true if the characters being processed are
  +     * from a text node, false if the characters being processed are from
  +     * an attribute value.
        * @param escLF true if the linefeed should be escaped.
        *
        * @return i+1 if a character was written, i+2 if two characters
  @@ -1629,11 +1641,12 @@
           int i,
           char[] chars,
           int len,
  +        boolean fromTextNode,
           boolean escLF)
           throws IOException
       {
   
  -        int pos = accumDefaultEntity(writer, ch, i, chars, len, escLF);
  +        int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
   
           if (i == pos)
           {
  @@ -1676,14 +1689,12 @@
                   writer.write(Integer.toString(next));
                   writer.write(';');
                   pos += 2; // count the two characters that went into writing out this entity
  -                /*} else if (null != ctbc && !ctbc.canConvert(ch)) {
  -                sb.append("&#x");
  -                sb.append(Integer.toString((int)ch, 16));
  -                sb.append(";");*/
               }
               else
               {
  -                if (!escapingNotNeeded(ch) || (m_charInfo.isSpecial(ch)))
  +                if (!escapingNotNeeded(ch) || 
  +                    (  (fromTextNode && m_charInfo.isSpecialTextChar(ch))
  +                     || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
                   {
                       writer.write("&#");
                       writer.write(Integer.toString(ch));
  @@ -1948,21 +1959,21 @@
           for (int i = 0; i < len; i++)
           {
               char ch = stringChars[i];
  -            if (escapingNotNeeded(ch) && (!m_charInfo.isSpecial(ch)))
  +            if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
               {
                   writer.write(ch);
               }
               else
               { // I guess the parser doesn't normalize cr/lf in attributes. -sb
  -                if ((CharInfo.S_CARRIAGERETURN == ch)
  -                    && ((i + 1) < len)
  -                    && (CharInfo.S_LINEFEED == stringChars[i + 1]))
  -                {
  -                    i++;
  -                    ch = CharInfo.S_LINEFEED;
  -                }
  +//                if ((CharInfo.S_CARRIAGERETURN == ch)
  +//                    && ((i + 1) < len)
  +//                    && (CharInfo.S_LINEFEED == stringChars[i + 1]))
  +//                {
  +//                    i++;
  +//                    ch = CharInfo.S_LINEFEED;
  +//                }
   
  -                accumDefaultEscape(writer, ch, i, stringChars, len, true);
  +                accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
               }
           }
   
  
  
  
  1.6.2.1   +77 -27    xml-xalan/java/src/org/apache/xml/serializer/CharInfo.java
  
  Index: CharInfo.java
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/CharInfo.java,v
  retrieving revision 1.6
  retrieving revision 1.6.2.1
  diff -u -r1.6 -r1.6.2.1
  --- CharInfo.java	14 Aug 2003 16:27:42 -0000	1.6
  +++ CharInfo.java	5 Oct 2003 14:01:57 -0000	1.6.2.1
  @@ -100,6 +100,9 @@
        */
       public static String XML_ENTITIES_RESOURCE = "org.apache.xml.serializer.XMLEntities";
   
  +    /** The horizontal tab character, which the parser should always normalize. */
  +    public static final char S_HORIZONAL_TAB = 0x09;
  +
       /** The linefeed character, which the parser should always normalize. */
       public static final char S_LINEFEED = 0x0A;
   
  @@ -116,14 +119,21 @@
       /** Copy the first 0,1 ... ASCII_MAX values into an array */
       private static final int ASCII_MAX = 128;
       
  -    /** Array of values is faster access than a set of bits */
  -    private boolean[] quickASCII = new boolean[ASCII_MAX];
  +    /** Array of values is faster access than a set of bits 
  +     * to quickly check ASCII characters in attribute values. 
  +     */
  +    private boolean[] isSpecialAttrASCII = new boolean[ASCII_MAX];
  +    
  +    /** Array of values is faster access than a set of bits 
  +     * to quickly check ASCII characters in text nodes. 
  +     */
  +    private boolean[] isSpecialTextASCII = new boolean[ASCII_MAX];
   
  -    private boolean[] isCleanASCII = new boolean[ASCII_MAX];
  +    private boolean[] isCleanTextASCII = new boolean[ASCII_MAX];
   
       /** An array of bits to record if the character is in the set.
        * Although information in this array is complete, the
  -     * quickASCII array is used first because access to its values
  +     * isSpecialAttrASCII array is used first because access to its values
        * is common and faster.
        */   
       private int array_of_bits[] = createEmptySetOfIntegers(65535);
  @@ -326,24 +336,40 @@
                   }
               }
           }
  -
  -        onlyQuotAmpLtGt = noExtraEntities;
  -
  -        // initialize the array with a cache of the BitSet values
  -        for (int i=0; i<ASCII_MAX; i++)
  -            quickASCII[i] = get(i);    
             
  -        // initialize the array with a cache of values
  -        // for use by ToStream.character(char[], int , int)
  +        /* initialize the array isCleanTextASCII[] with a cache of values
  +         * for use by ToStream.character(char[], int , int)
  +         * and the array isSpecialTextASCII[] with the opposite values
  +         * (all in the name of performance!)
  +         */
           for (int ch = 0; ch <ASCII_MAX; ch++)
           if((((0x20 <= ch || (0x0A == ch || 0x0D == ch || 0x09 == ch)))
                && (!get(ch))) || ('"' == ch))
           {
  -            isCleanASCII[ch] = true;
  +            isCleanTextASCII[ch] = true;
  +            isSpecialTextASCII[ch] = false;
           }
           else {
  -            isCleanASCII[ch] = false;     
  -        }
  +            isCleanTextASCII[ch] = false;
  +            isSpecialTextASCII[ch] = true;     
  +        }       
  +        
  +        /* Now that we've used get(ch) just above to initialize the
  +         * two arrays we will change by adding a tab to the set of 
  +         * special chars.  We do this because a tab is always a
  +         * special character in an attribute, but only a special character
  +         * in text if it has an entity defined for it.
  +         * This is the reason for this delay.
  +         */ 
  +        set(S_HORIZONAL_TAB);
  +        
  +
  +        onlyQuotAmpLtGt = noExtraEntities;
  +
  +        // initialize the array with a cache of the BitSet values
  +        for (int i=0; i<ASCII_MAX; i++)
  +            isSpecialAttrASCII[i] = get(i);    
  +
       }
   
       /**
  @@ -388,22 +414,45 @@
           m_charKey.setChar(value);
           return (String) m_charToEntityRef.get(m_charKey);
       }
  +    
  +    /**
  +     * Tell if the character argument that is from
  +     * an attribute value should have special treatment.
  +     * 
  +     * @param value the value of a character that is in an attribute value
  +     * @return true if the character should have any special treatment, 
  +     * such as when writing out attribute values, 
  +     * or entity references.
  +     */
  +    public final boolean isSpecialAttrChar(int value)
  +    {
  +        // for performance try the values in the boolean array first,
  +        // this is faster access than the BitSet for common ASCII values
  +
  +        if (value < ASCII_MAX)
  +            return isSpecialAttrASCII[value];
  +
  +        // rather than java.util.BitSet, our private
  +        // implementation is faster (and less general).
  +        return get(value);
  +    }    
   
       /**
  -     * Tell if the character argument should have special treatment.
  -     *
  -     * @param value character value.
  -     *
  -     * @return true if the character should have any special treatment, such as
  -     * when writing out attribute values, or entity references.
  +     * Tell if the character argument that is from a 
  +     * text node should have special treatment.
  +     * 
  +     * @param value the value of a character that is in a text node
  +     * @return true if the character should have any special treatment, 
  +     * such as when writing out attribute values, 
  +     * or entity references.
        */
  -    public final boolean isSpecial(int value)
  +    public final boolean isSpecialTextChar(int value)
       {
           // for performance try the values in the boolean array first,
           // this is faster access than the BitSet for common ASCII values
   
           if (value < ASCII_MAX)
  -            return quickASCII[value];
  +            return isSpecialTextASCII[value];
   
           // rather than java.util.BitSet, our private
           // implementation is faster (and less general).
  @@ -411,13 +460,14 @@
       }
       
       /**
  -     * This method is used to determine if an ASCII character is "clean"
  +     * This method is used to determine if an ASCII character in
  +     * a text node (not an attribute value) is "clean".
        * @param value the character to check (0 to 127).
        * @return true if the character can go to the writer as-is
        */
  -    public final boolean isASCIIClean(int value)
  +    public final boolean isTextASCIIClean(int value)
       {
  -        return isCleanASCII[value];
  +        return isCleanTextASCII[value];
       }
       
   //  In the future one might want to use the array directly and avoid
  @@ -425,7 +475,7 @@
   //  so don't do it (for now) - bjm    
   //    public final boolean[] getASCIIClean()
   //    {
  -//        return isCleanASCII;
  +//        return isCleanTextASCII;
   //    }
   
   
  
  
  
  1.23.2.2  +2 -2      xml-xalan/java/src/org/apache/xml/serializer/ToHTMLStream.java
  
  Index: ToHTMLStream.java
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/ToHTMLStream.java,v
  retrieving revision 1.23.2.1
  retrieving revision 1.23.2.2
  diff -u -r1.23.2.1 -r1.23.2.2
  --- ToHTMLStream.java	26 Sep 2003 19:53:17 -0000	1.23.2.1
  +++ ToHTMLStream.java	5 Oct 2003 14:01:57 -0000	1.23.2.2
  @@ -1305,7 +1305,7 @@
               // System.out.println("ch: "+(int)ch);
               // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
               // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
  -            if (escapingNotNeeded(ch) && (!m_charInfo.isSpecial(ch)))
  +            if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
               {
                   cleanLength++;
               }
  @@ -1325,7 +1325,7 @@
                       writer.write(chars,cleanStart,cleanLength);
                       cleanLength = 0;
                   }
  -                int pos = accumDefaultEntity(writer, ch, i, chars, end, false);
  +                int pos = accumDefaultEntity(writer, ch, i, chars, end, false, false);
   
                   if (i != pos)
                   {
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xalan-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xalan-cvs-help@xml.apache.org