You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ve...@apache.org on 2003/12/09 12:08:23 UTC
cvs commit: xml-xerces/java/src/org/apache/xml/serialize BaseMarkupSerializer.java HTMLSerializer.java IndentPrinter.java XML11Serializer.java XMLSerializer.java
venu 2003/12/09 03:08:23
Modified: java/src/org/apache/xml/serialize Tag: jaxp13-2_6-branch
BaseMarkupSerializer.java HTMLSerializer.java
IndentPrinter.java XML11Serializer.java
XMLSerializer.java
Log:
Fix for bug 22472. Thanks to Kohsuke.Kawaguchi@Sun.COM.
Revision Changes Path
No revision
No revision
1.48.2.1 +39 -16 xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java
Index: BaseMarkupSerializer.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java,v
retrieving revision 1.48
retrieving revision 1.48.2.1
diff -u -r1.48 -r1.48.2.1
--- BaseMarkupSerializer.java 18 Nov 2003 22:59:50 -0000 1.48
+++ BaseMarkupSerializer.java 9 Dec 2003 11:08:23 -0000 1.48.2.1
@@ -511,14 +511,27 @@
// Nothing to do here. All the magic happens in startDocument(String)
}
+ /**
+ * Checks if the specified string entirely consists of whitespace.
+ */
+ protected final boolean isIgnorable( char[] chars, int start, int length ) {
+ for( int i=start; i<start+length; i++ ) {
+ char ch = chars[i];
+ if(ch==' ' || ch=='\t' || ch=='\r' || ch=='\n')
+ return false;
+ }
+ return true;
+ }
public void characters( char[] chars, int start, int length )
throws SAXException
{
ElementState state;
+
+ boolean ignorable = isIgnorable(chars,start,length);
try {
- state = content();
+ state = content(ignorable);
// Check if text should be print as CDATA section or unescaped
// based on elements listed in the output format (the element
@@ -582,7 +595,11 @@
printText( chars, start, length, true, state.unescaped );
_printer.setNextIndent( saveIndent );
} else {
- printText( chars, start, length, false, state.unescaped );
+ // if the string is whitespace only and we are indenting
+ // this characters are probably just for indentation.
+ // we will handle indentation by ourselves, so don't print it.
+ if(!ignorable || !_indenting)
+ printText( chars, start, length, false, state.unescaped );
}
}
} catch ( IOException except ) {
@@ -597,7 +614,7 @@
int i;
try {
- content();
+ content(true);
// Print ignorable whitespaces only when indenting, after
// all they are indentation. Cancel the indentation to
@@ -629,7 +646,7 @@
int index;
ElementState state;
- state = content();
+ state = content(false);
// Create the processing instruction textual representation.
// Make sure we don't have '?>' inside either target or code.
@@ -686,7 +703,7 @@
if ( _format.getOmitComments() )
return;
- state = content();
+ state = content(false);
// Create the processing comment textual representation.
// Make sure we don't have '-->' inside the comment.
index = text.indexOf( "-->" );
@@ -825,7 +842,7 @@
{
try {
endCDATA();
- content();
+ content(false);
_printer.printText( '&' );
_printer.printText( name );
_printer.printText( ';' );
@@ -1121,7 +1138,7 @@
Node child;
endCDATA();
- content();
+ content(false);
if (fDOMFilter !=null &&
(fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE)!= 0) {
@@ -1283,25 +1300,27 @@
break;
}
}
-
-
+
/**
* Must be called by a method about to print any type of content.
* If the element was just opened, the opening tag is closed and
* will be matched to a closing tag. Returns the current element
* state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
*
+ * @param ignorable
+ * If the content entirely consists of ignorable whitespaces.
+ *
* @return The current element state
* @throws IOException An I/O exception occured while
* serializing
*/
- protected ElementState content()
+ protected ElementState content( boolean ignorable )
throws IOException
{
ElementState state;
state = getElementState();
- if ( ! isDocumentState() ) {
+ if ( ! isDocumentState() && (!ignorable || state.preserveSpace)) {
// Need to close CData section first
if ( state.inCData && ! state.doCData ) {
_printer.printText( "]]>" );
@@ -1343,8 +1362,8 @@
throws IOException
{
ElementState state;
-
- state = content();
+ boolean ignorable = text.trim().length()==0;
+ state = content(ignorable);
// Check if text should be print as CDATA section or unescaped
// based on elements listed in the output format (the element
// state) or whether we are inside a CDATA section or entity.
@@ -1379,7 +1398,11 @@
printText( text, true, state.unescaped );
_printer.setNextIndent( saveIndent );
} else {
- printText( text, false, state.unescaped );
+ // if the string is whitespace only and we are indenting
+ // this characters are probably just for indentation.
+ // we will handle indentation by ourselves, so don't print it.
+ if( !ignorable || !_indenting )
+ printText( text, false, state.unescaped );
}
}
}
@@ -1514,7 +1537,7 @@
else {
// REVISIT: For XML 1.1 should we perform extra checks here?
// Should it be serialized as entity reference?
- if (content().inCData ) {
+ if (content(false).inCData ) {
_printer.printText("]]>&#x");
_printer.printText(Integer.toHexString(supplemental));
_printer.printText(";<![CDATA[");
1.23.2.1 +3 -3 xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java
Index: HTMLSerializer.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java,v
retrieving revision 1.23
retrieving revision 1.23.2.1
diff -u -r1.23 -r1.23.2.1
--- HTMLSerializer.java 23 Sep 2003 21:42:31 -0000 1.23
+++ HTMLSerializer.java 9 Dec 2003 11:08:23 -0000 1.23.2.1
@@ -488,7 +488,7 @@
try {
// HTML: no CDATA section
- state = content();
+ state = content(isIgnorable(chars,start,length));
state.doCData = false;
super.characters( chars, start, length );
} catch ( IOException except ) {
@@ -860,7 +860,7 @@
ElementState state;
// HTML: no CDATA section
- state = content();
+ state = content(text.trim().length()==0);
super.characters( text );
}
1.8.6.1 +17 -16 xml-xerces/java/src/org/apache/xml/serialize/IndentPrinter.java
Index: IndentPrinter.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/IndentPrinter.java,v
retrieving revision 1.8
retrieving revision 1.8.6.1
diff -u -r1.8 -r1.8.6.1
--- IndentPrinter.java 13 Jan 2003 15:59:09 -0000 1.8
+++ IndentPrinter.java 9 Dec 2003 11:08:23 -0000 1.8.6.1
@@ -207,8 +207,15 @@
* separator will be counted. If the line accumulated so far is
* long enough, it will be printed.
*/
- public void printSpace()
- {
+ public void printSpace() {
+ printSpace(1);
+ }
+
+ /**
+ * Prints a space <tt>width</tt> times, which may be broken into
+ * separate lines.
+ */
+ private void printSpace(int width) {
// The line consists of the text accumulated in _line,
// followed by one or more spaces as counted by _spaces,
// followed by more space accumulated in _text:
@@ -253,7 +260,7 @@
}
// Starting a new word: accumulate the text between the line
// and this new word; not a new word: just add another space.
- ++_spaces;
+ _spaces += width;
}
@@ -272,16 +279,12 @@
public void breakLine( boolean preserveSpace )
{
- // Equivalent to calling printSpace and forcing a flushLine.
- if ( _text.length() > 0 ) {
- while ( _spaces > 0 ) {
- _line.append( ' ' );
- --_spaces;
- }
- _line.append( _text );
- _text = new StringBuffer( 20 );
- }
+ // let the proper word wrapping happen between
+ // _line and _text
+ printSpace(0);
flushLine( preserveSpace );
+ _thisIndent = _nextIndent;
+
try {
// Print line and new line, then zero the line contents.
_writer.write( _format.getLineSeparator() );
@@ -298,7 +301,7 @@
* Flushes the line accumulated so far to the writer and get ready
* to accumulate the next line. This method is called by {@link
* #printText} and {@link #printSpace} when the accumulated line plus
- * accumulated text are two long to fit on a given line. At the end of
+ * accumulated text are too long to fit on a given line. At the end of
* this method _line is empty and _spaces is zero.
*/
public void flushLine( boolean preserveSpace )
@@ -320,8 +323,6 @@
--indent;
}
}
- _thisIndent = _nextIndent;
-
// There is no need to print the spaces at the end of the line,
// they are simply stripped and replaced with a single line
// separator.
1.7.2.1 +3 -3 xml-xerces/java/src/org/apache/xml/serialize/XML11Serializer.java
Index: XML11Serializer.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XML11Serializer.java,v
retrieving revision 1.7
retrieving revision 1.7.2.1
diff -u -r1.7 -r1.7.2.1
--- XML11Serializer.java 18 Nov 2003 22:59:50 -0000 1.7
+++ XML11Serializer.java 9 Dec 2003 11:08:23 -0000 1.7.2.1
@@ -224,7 +224,7 @@
ElementState state;
try {
- state = content();
+ state = content(isIgnorable(chars,start,length));
// Check if text should be print as CDATA section or unescaped
// based on elements listed in the output format (the element
@@ -422,7 +422,7 @@
fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
}
else {
- if (content().inCData ) {
+ if (content(false).inCData ) {
_printer.printText("]]>&#x");
_printer.printText(Integer.toHexString(supplemental));
_printer.printText(";<![CDATA[");
1.55.2.1 +4 -2 xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java
Index: XMLSerializer.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java,v
retrieving revision 1.55
retrieving revision 1.55.2.1
diff -u -r1.55 -r1.55.2.1
--- XMLSerializer.java 18 Nov 2003 22:59:50 -0000 1.55
+++ XMLSerializer.java 9 Dec 2003 11:08:23 -0000 1.55.2.1
@@ -1386,7 +1386,9 @@
}
continue;
}
- if ( unescaped )
+ if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
+ _printer.printSpace();
+ else if ( unescaped )
_printer.printText( ch );
else
printXMLChar( ch );
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org