You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xalan.apache.org by mi...@apache.org on 2003/10/05 16:01:58 UTC
cvs commit: xml-xalan/java/src/org/apache/xml/serializer ToStream.java CharInfo.java ToHTMLStream.java
minchau 2003/10/05 07:01:57
Modified: java/src/org/apache/xml/serializer Tag: xslt20-compiled
ToStream.java CharInfo.java ToHTMLStream.java
Log:
Fix for bugzila 22623 (tab in attribute).
Revision Changes Path
No revision
No revision
1.21.2.2 +34 -23 xml-xalan/java/src/org/apache/xml/serializer/ToStream.java
Index: ToStream.java
===================================================================
RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/ToStream.java,v
retrieving revision 1.21.2.1
retrieving revision 1.21.2.2
diff -u -r1.21.2.1 -r1.21.2.2
--- ToStream.java 26 Sep 2003 19:53:17 -0000 1.21.2.1
+++ ToStream.java 5 Oct 2003 14:01:57 -0000 1.21.2.2
@@ -1061,6 +1061,8 @@
* @param i index into character array.
* @param chars non-null reference to character array.
* @param len length of chars.
+ * @param fromTextNode true if the characters being processed
+ * are from a text node, false if they are from an attribute value
* @param escLF true if the linefeed should be escaped.
*
* @return i+1 if the character was written, else i.
@@ -1073,6 +1075,7 @@
int i,
char[] chars,
int len,
+ boolean fromTextNode,
boolean escLF)
throws IOException
{
@@ -1083,7 +1086,9 @@
}
else
{
- if (m_charInfo.isSpecial(ch))
+ // if this is text node character and a special one of those,
+ // or if this is a character from attribute value and a special one of those
+ if ((fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))
{
String entityRef = m_charInfo.getEntityNameForChar(ch);
@@ -1474,9 +1479,9 @@
* processing for dirty characters here as for non-whitespace.
*
*/
- if (!m_charInfo.isASCIIClean(ch1))
+ if (!m_charInfo.isTextASCIIClean(ch1))
{
- lastDirty = processDirty(chars,end, i,ch1, lastDirty);
+ lastDirty = processDirty(chars,end, i,ch1, lastDirty, true);
i = lastDirty;
}
}
@@ -1500,7 +1505,7 @@
char ch2;
while (i<end
&& ((ch2 = chars[i])<127)
- && m_charInfo.isASCIIClean(ch2))
+ && m_charInfo.isTextASCIIClean(ch2))
i++;
if (i == end)
break;
@@ -1509,14 +1514,14 @@
final char ch = chars[i];
if (
- (escapingNotNeeded(ch) && (!m_charInfo.isSpecial(ch)))
+ (escapingNotNeeded(ch) && (!m_charInfo.isSpecialTextChar(ch)))
|| ('"' == ch))
{
; // a character needing no special processing
}
else
{
- lastDirty = processDirty(chars,end, i, ch, lastDirty);
+ lastDirty = processDirty(chars,end, i, ch, lastDirty, true);
i = lastDirty;
}
}
@@ -1551,6 +1556,8 @@
* @param i the index of the dirty character
* @param ch the character in chars[i]
* @param lastDirty the last dirty character previous to i
+ * @param fromTextNode true if the characters being processed are
+ * from a text node, false if they are from an attribute value.
* @return the index of the last character processed
*/
private int processDirty(
@@ -1558,7 +1565,8 @@
int end,
int i,
char ch,
- int lastDirty) throws IOException
+ int lastDirty,
+ boolean fromTextNode) throws IOException
{
int startClean = lastDirty + 1;
// if we have some clean characters accumulated
@@ -1570,7 +1578,7 @@
}
// process the "dirty" character
- if (CharInfo.S_LINEFEED == ch)
+ if (CharInfo.S_LINEFEED == ch && fromTextNode)
{
m_writer.write(m_lineSep, 0, m_lineSepLen);
}
@@ -1583,6 +1591,7 @@
i,
chars,
end,
+ fromTextNode,
false);
i = startClean - 1;
}
@@ -1616,6 +1625,9 @@
* @param i index into character array.
* @param chars non-null reference to character array.
* @param len length of chars.
+ * @param fromTextNode true if the characters being processed are
+ * from a text node, false if the characters being processed are from
+ * an attribute value.
* @param escLF true if the linefeed should be escaped.
*
* @return i+1 if a character was written, i+2 if two characters
@@ -1629,11 +1641,12 @@
int i,
char[] chars,
int len,
+ boolean fromTextNode,
boolean escLF)
throws IOException
{
- int pos = accumDefaultEntity(writer, ch, i, chars, len, escLF);
+ int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
if (i == pos)
{
@@ -1676,14 +1689,12 @@
writer.write(Integer.toString(next));
writer.write(';');
pos += 2; // count the two characters that went into writing out this entity
- /*} else if (null != ctbc && !ctbc.canConvert(ch)) {
- sb.append("&#x");
- sb.append(Integer.toString((int)ch, 16));
- sb.append(";");*/
}
else
{
- if (!escapingNotNeeded(ch) || (m_charInfo.isSpecial(ch)))
+ if (!escapingNotNeeded(ch) ||
+ ( (fromTextNode && m_charInfo.isSpecialTextChar(ch))
+ || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
{
writer.write("&#");
writer.write(Integer.toString(ch));
@@ -1948,21 +1959,21 @@
for (int i = 0; i < len; i++)
{
char ch = stringChars[i];
- if (escapingNotNeeded(ch) && (!m_charInfo.isSpecial(ch)))
+ if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
{
writer.write(ch);
}
else
{ // I guess the parser doesn't normalize cr/lf in attributes. -sb
- if ((CharInfo.S_CARRIAGERETURN == ch)
- && ((i + 1) < len)
- && (CharInfo.S_LINEFEED == stringChars[i + 1]))
- {
- i++;
- ch = CharInfo.S_LINEFEED;
- }
+// if ((CharInfo.S_CARRIAGERETURN == ch)
+// && ((i + 1) < len)
+// && (CharInfo.S_LINEFEED == stringChars[i + 1]))
+// {
+// i++;
+// ch = CharInfo.S_LINEFEED;
+// }
- accumDefaultEscape(writer, ch, i, stringChars, len, true);
+ accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
}
}
1.6.2.1 +77 -27 xml-xalan/java/src/org/apache/xml/serializer/CharInfo.java
Index: CharInfo.java
===================================================================
RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/CharInfo.java,v
retrieving revision 1.6
retrieving revision 1.6.2.1
diff -u -r1.6 -r1.6.2.1
--- CharInfo.java 14 Aug 2003 16:27:42 -0000 1.6
+++ CharInfo.java 5 Oct 2003 14:01:57 -0000 1.6.2.1
@@ -100,6 +100,9 @@
*/
public static String XML_ENTITIES_RESOURCE = "org.apache.xml.serializer.XMLEntities";
+ /** The horizontal tab character, which the parser should always normalize. */
+ public static final char S_HORIZONAL_TAB = 0x09;
+
/** The linefeed character, which the parser should always normalize. */
public static final char S_LINEFEED = 0x0A;
@@ -116,14 +119,21 @@
/** Copy the first 0,1 ... ASCII_MAX values into an array */
private static final int ASCII_MAX = 128;
- /** Array of values is faster access than a set of bits */
- private boolean[] quickASCII = new boolean[ASCII_MAX];
+ /** Array of values is faster access than a set of bits
+ * to quickly check ASCII characters in attribute values.
+ */
+ private boolean[] isSpecialAttrASCII = new boolean[ASCII_MAX];
+
+ /** Array of values is faster access than a set of bits
+ * to quickly check ASCII characters in text nodes.
+ */
+ private boolean[] isSpecialTextASCII = new boolean[ASCII_MAX];
- private boolean[] isCleanASCII = new boolean[ASCII_MAX];
+ private boolean[] isCleanTextASCII = new boolean[ASCII_MAX];
/** An array of bits to record if the character is in the set.
* Although information in this array is complete, the
- * quickASCII array is used first because access to its values
+ * isSpecialAttrASCII array is used first because access to its values
* is common and faster.
*/
private int array_of_bits[] = createEmptySetOfIntegers(65535);
@@ -326,24 +336,40 @@
}
}
}
-
- onlyQuotAmpLtGt = noExtraEntities;
-
- // initialize the array with a cache of the BitSet values
- for (int i=0; i<ASCII_MAX; i++)
- quickASCII[i] = get(i);
- // initialize the array with a cache of values
- // for use by ToStream.character(char[], int , int)
+ /* initialize the array isCleanTextASCII[] with a cache of values
+ * for use by ToStream.character(char[], int , int)
+ * and the array isSpecialTextASCII[] with the opposite values
+ * (all in the name of performance!)
+ */
for (int ch = 0; ch <ASCII_MAX; ch++)
if((((0x20 <= ch || (0x0A == ch || 0x0D == ch || 0x09 == ch)))
&& (!get(ch))) || ('"' == ch))
{
- isCleanASCII[ch] = true;
+ isCleanTextASCII[ch] = true;
+ isSpecialTextASCII[ch] = false;
}
else {
- isCleanASCII[ch] = false;
- }
+ isCleanTextASCII[ch] = false;
+ isSpecialTextASCII[ch] = true;
+ }
+
+ /* Now that we've used get(ch) just above to initialize the
+ * two arrays we will change by adding a tab to the set of
+ * special chars. We do this because a tab is always a
+ * special character in an attribute, but only a special character
+ * in text if it has an entity defined for it.
+ * This is the reason for this delay.
+ */
+ set(S_HORIZONAL_TAB);
+
+
+ onlyQuotAmpLtGt = noExtraEntities;
+
+ // initialize the array with a cache of the BitSet values
+ for (int i=0; i<ASCII_MAX; i++)
+ isSpecialAttrASCII[i] = get(i);
+
}
/**
@@ -388,22 +414,45 @@
m_charKey.setChar(value);
return (String) m_charToEntityRef.get(m_charKey);
}
+
+ /**
+ * Tell if the character argument that is from
+ * an attribute value should have special treatment.
+ *
+ * @param value the value of a character that is in an attribute value
+ * @return true if the character should have any special treatment,
+ * such as when writing out attribute values,
+ * or entity references.
+ */
+ public final boolean isSpecialAttrChar(int value)
+ {
+ // for performance try the values in the boolean array first,
+ // this is faster access than the BitSet for common ASCII values
+
+ if (value < ASCII_MAX)
+ return isSpecialAttrASCII[value];
+
+ // rather than java.util.BitSet, our private
+ // implementation is faster (and less general).
+ return get(value);
+ }
/**
- * Tell if the character argument should have special treatment.
- *
- * @param value character value.
- *
- * @return true if the character should have any special treatment, such as
- * when writing out attribute values, or entity references.
+ * Tell if the character argument that is from a
+ * text node should have special treatment.
+ *
+ * @param value the value of a character that is in a text node
+ * @return true if the character should have any special treatment,
+ * such as when writing out attribute values,
+ * or entity references.
*/
- public final boolean isSpecial(int value)
+ public final boolean isSpecialTextChar(int value)
{
// for performance try the values in the boolean array first,
// this is faster access than the BitSet for common ASCII values
if (value < ASCII_MAX)
- return quickASCII[value];
+ return isSpecialTextASCII[value];
// rather than java.util.BitSet, our private
// implementation is faster (and less general).
@@ -411,13 +460,14 @@
}
/**
- * This method is used to determine if an ASCII character is "clean"
+ * This method is used to determine if an ASCII character in
+ * a text node (not an attribute value) is "clean".
* @param value the character to check (0 to 127).
* @return true if the character can go to the writer as-is
*/
- public final boolean isASCIIClean(int value)
+ public final boolean isTextASCIIClean(int value)
{
- return isCleanASCII[value];
+ return isCleanTextASCII[value];
}
// In the future one might want to use the array directly and avoid
@@ -425,7 +475,7 @@
// so don't do it (for now) - bjm
// public final boolean[] getASCIIClean()
// {
-// return isCleanASCII;
+// return isCleanTextASCII;
// }
1.23.2.2 +2 -2 xml-xalan/java/src/org/apache/xml/serializer/ToHTMLStream.java
Index: ToHTMLStream.java
===================================================================
RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/ToHTMLStream.java,v
retrieving revision 1.23.2.1
retrieving revision 1.23.2.2
diff -u -r1.23.2.1 -r1.23.2.2
--- ToHTMLStream.java 26 Sep 2003 19:53:17 -0000 1.23.2.1
+++ ToHTMLStream.java 5 Oct 2003 14:01:57 -0000 1.23.2.2
@@ -1305,7 +1305,7 @@
// System.out.println("ch: "+(int)ch);
// System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
// System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
- if (escapingNotNeeded(ch) && (!m_charInfo.isSpecial(ch)))
+ if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
{
cleanLength++;
}
@@ -1325,7 +1325,7 @@
writer.write(chars,cleanStart,cleanLength);
cleanLength = 0;
}
- int pos = accumDefaultEntity(writer, ch, i, chars, end, false);
+ int pos = accumDefaultEntity(writer, ch, i, chars, end, false, false);
if (i != pos)
{
---------------------------------------------------------------------
To unsubscribe, e-mail: xalan-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xalan-cvs-help@xml.apache.org